From 871480933a1c28f8a9fed4c4d34d06c439a7a422 Mon Sep 17 00:00:00 2001 From: Srikant Patnaik Date: Sun, 11 Jan 2015 12:28:04 +0530 Subject: Moved, renamed, and deleted files The original directory structure was scattered and unorganized. Changes are basically to make it look like kernel structure. --- drivers/infiniband/hw/amso1100/Kbuild | 6 + drivers/infiniband/hw/amso1100/Kconfig | 15 + drivers/infiniband/hw/amso1100/c2.c | 1253 ++++ drivers/infiniband/hw/amso1100/c2.h | 548 ++ drivers/infiniband/hw/amso1100/c2_ae.c | 324 + drivers/infiniband/hw/amso1100/c2_ae.h | 108 + drivers/infiniband/hw/amso1100/c2_alloc.c | 142 + drivers/infiniband/hw/amso1100/c2_cm.c | 453 ++ drivers/infiniband/hw/amso1100/c2_cq.c | 437 ++ drivers/infiniband/hw/amso1100/c2_intr.c | 218 + drivers/infiniband/hw/amso1100/c2_mm.c | 377 + drivers/infiniband/hw/amso1100/c2_mq.c | 174 + drivers/infiniband/hw/amso1100/c2_mq.h | 106 + drivers/infiniband/hw/amso1100/c2_pd.c | 90 + drivers/infiniband/hw/amso1100/c2_provider.c | 887 +++ drivers/infiniband/hw/amso1100/c2_provider.h | 182 + drivers/infiniband/hw/amso1100/c2_qp.c | 1022 +++ drivers/infiniband/hw/amso1100/c2_rnic.c | 654 ++ drivers/infiniband/hw/amso1100/c2_status.h | 158 + drivers/infiniband/hw/amso1100/c2_user.h | 82 + drivers/infiniband/hw/amso1100/c2_vq.c | 260 + drivers/infiniband/hw/amso1100/c2_vq.h | 63 + drivers/infiniband/hw/amso1100/c2_wr.h | 1520 ++++ drivers/infiniband/hw/cxgb3/Kconfig | 27 + drivers/infiniband/hw/cxgb3/Makefile | 8 + drivers/infiniband/hw/cxgb3/cxio_dbg.c | 207 + drivers/infiniband/hw/cxgb3/cxio_hal.c | 1345 ++++ drivers/infiniband/hw/cxgb3/cxio_hal.h | 211 + drivers/infiniband/hw/cxgb3/cxio_resource.c | 343 + drivers/infiniband/hw/cxgb3/cxio_resource.h | 69 + drivers/infiniband/hw/cxgb3/cxio_wr.h | 802 ++ drivers/infiniband/hw/cxgb3/iwch.c | 292 + drivers/infiniband/hw/cxgb3/iwch.h | 182 + drivers/infiniband/hw/cxgb3/iwch_cm.c | 2254 ++++++ drivers/infiniband/hw/cxgb3/iwch_cm.h | 233 + drivers/infiniband/hw/cxgb3/iwch_cq.c | 233 + drivers/infiniband/hw/cxgb3/iwch_ev.c | 232 + drivers/infiniband/hw/cxgb3/iwch_mem.c | 203 + drivers/infiniband/hw/cxgb3/iwch_provider.c | 1468 ++++ drivers/infiniband/hw/cxgb3/iwch_provider.h | 360 + drivers/infiniband/hw/cxgb3/iwch_qp.c | 1161 +++ drivers/infiniband/hw/cxgb3/iwch_user.h | 74 + drivers/infiniband/hw/cxgb3/tcb.h | 632 ++ drivers/infiniband/hw/cxgb4/Kconfig | 18 + drivers/infiniband/hw/cxgb4/Makefile | 5 + drivers/infiniband/hw/cxgb4/cm.c | 2722 +++++++ drivers/infiniband/hw/cxgb4/cq.c | 898 +++ drivers/infiniband/hw/cxgb4/device.c | 630 ++ drivers/infiniband/hw/cxgb4/ev.c | 199 + drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 801 ++ drivers/infiniband/hw/cxgb4/mem.c | 805 ++ drivers/infiniband/hw/cxgb4/provider.c | 530 ++ drivers/infiniband/hw/cxgb4/qp.c | 1623 +++++ drivers/infiniband/hw/cxgb4/resource.c | 481 ++ drivers/infiniband/hw/cxgb4/t4.h | 577 ++ drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 839 +++ drivers/infiniband/hw/cxgb4/user.h | 73 + drivers/infiniband/hw/ehca/Kconfig | 9 + drivers/infiniband/hw/ehca/Makefile | 16 + drivers/infiniband/hw/ehca/ehca_av.c | 277 + drivers/infiniband/hw/ehca/ehca_classes.h | 482 ++ drivers/infiniband/hw/ehca/ehca_classes_pSeries.h | 208 + drivers/infiniband/hw/ehca/ehca_cq.c | 404 + drivers/infiniband/hw/ehca/ehca_eq.c | 189 + drivers/infiniband/hw/ehca/ehca_hca.c | 410 ++ drivers/infiniband/hw/ehca/ehca_irq.c | 942 +++ drivers/infiniband/hw/ehca/ehca_irq.h | 77 + drivers/infiniband/hw/ehca/ehca_iverbs.h | 212 + drivers/infiniband/hw/ehca/ehca_main.c | 1100 +++ drivers/infiniband/hw/ehca/ehca_mcast.c | 131 + drivers/infiniband/hw/ehca/ehca_mrmw.c | 2664 +++++++ drivers/infiniband/hw/ehca/ehca_mrmw.h | 132 + drivers/infiniband/hw/ehca/ehca_pd.c | 124 + drivers/infiniband/hw/ehca/ehca_qes.h | 260 + drivers/infiniband/hw/ehca/ehca_qp.c | 2261 ++++++ drivers/infiniband/hw/ehca/ehca_reqs.c | 953 +++ drivers/infiniband/hw/ehca/ehca_sqp.c | 237 + drivers/infiniband/hw/ehca/ehca_tools.h | 156 + drivers/infiniband/hw/ehca/ehca_uverbs.c | 309 + drivers/infiniband/hw/ehca/hcp_if.c | 969 +++ drivers/infiniband/hw/ehca/hcp_if.h | 265 + drivers/infiniband/hw/ehca/hcp_phyp.c | 82 + drivers/infiniband/hw/ehca/hcp_phyp.h | 90 + drivers/infiniband/hw/ehca/hipz_fns.h | 68 + drivers/infiniband/hw/ehca/hipz_fns_core.h | 100 + drivers/infiniband/hw/ehca/hipz_hw.h | 414 ++ drivers/infiniband/hw/ehca/ipz_pt_fn.c | 294 + drivers/infiniband/hw/ehca/ipz_pt_fn.h | 289 + drivers/infiniband/hw/ipath/Kconfig | 11 + drivers/infiniband/hw/ipath/Makefile | 37 + drivers/infiniband/hw/ipath/ipath_common.h | 851 +++ drivers/infiniband/hw/ipath/ipath_cq.c | 478 ++ drivers/infiniband/hw/ipath/ipath_debug.h | 99 + drivers/infiniband/hw/ipath/ipath_diag.c | 563 ++ drivers/infiniband/hw/ipath/ipath_dma.c | 192 + drivers/infiniband/hw/ipath/ipath_driver.c | 2789 +++++++ drivers/infiniband/hw/ipath/ipath_eeprom.c | 1183 +++ drivers/infiniband/hw/ipath/ipath_file_ops.c | 2616 +++++++ drivers/infiniband/hw/ipath/ipath_fs.c | 422 ++ drivers/infiniband/hw/ipath/ipath_iba6110.c | 1941 +++++ drivers/infiniband/hw/ipath/ipath_init_chip.c | 1076 +++ drivers/infiniband/hw/ipath/ipath_intr.c | 1274 ++++ drivers/infiniband/hw/ipath/ipath_kernel.h | 1378 ++++ drivers/infiniband/hw/ipath/ipath_keys.c | 270 + drivers/infiniband/hw/ipath/ipath_mad.c | 1507 ++++ drivers/infiniband/hw/ipath/ipath_mmap.c | 174 + drivers/infiniband/hw/ipath/ipath_mr.c | 430 ++ drivers/infiniband/hw/ipath/ipath_qp.c | 1080 +++ drivers/infiniband/hw/ipath/ipath_rc.c | 1969 +++++ drivers/infiniband/hw/ipath/ipath_registers.h | 512 ++ drivers/infiniband/hw/ipath/ipath_ruc.c | 734 ++ drivers/infiniband/hw/ipath/ipath_sdma.c | 818 +++ drivers/infiniband/hw/ipath/ipath_srq.c | 380 + drivers/infiniband/hw/ipath/ipath_stats.c | 347 + drivers/infiniband/hw/ipath/ipath_sysfs.c | 1238 ++++ drivers/infiniband/hw/ipath/ipath_uc.c | 547 ++ drivers/infiniband/hw/ipath/ipath_ud.c | 580 ++ drivers/infiniband/hw/ipath/ipath_user_pages.c | 229 + drivers/infiniband/hw/ipath/ipath_user_sdma.c | 880 +++ drivers/infiniband/hw/ipath/ipath_user_sdma.h | 52 + drivers/infiniband/hw/ipath/ipath_verbs.c | 2341 ++++++ drivers/infiniband/hw/ipath/ipath_verbs.h | 936 +++ drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 364 + drivers/infiniband/hw/ipath/ipath_wc_ppc64.c | 62 + drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | 184 + drivers/infiniband/hw/mlx4/Kconfig | 10 + drivers/infiniband/hw/mlx4/Makefile | 3 + drivers/infiniband/hw/mlx4/ah.c | 199 + drivers/infiniband/hw/mlx4/cq.c | 825 +++ drivers/infiniband/hw/mlx4/doorbell.c | 98 + drivers/infiniband/hw/mlx4/mad.c | 429 ++ drivers/infiniband/hw/mlx4/main.c | 1370 ++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 364 + drivers/infiniband/hw/mlx4/mr.c | 359 + drivers/infiniband/hw/mlx4/qp.c | 2175 ++++++ drivers/infiniband/hw/mlx4/srq.c | 368 + drivers/infiniband/hw/mlx4/user.h | 97 + drivers/infiniband/hw/mthca/Kconfig | 17 + drivers/infiniband/hw/mthca/Makefile | 7 + drivers/infiniband/hw/mthca/mthca_allocator.c | 301 + drivers/infiniband/hw/mthca/mthca_av.c | 374 + drivers/infiniband/hw/mthca/mthca_catas.c | 200 + drivers/infiniband/hw/mthca/mthca_cmd.c | 1969 +++++ drivers/infiniband/hw/mthca/mthca_cmd.h | 325 + drivers/infiniband/hw/mthca/mthca_config_reg.h | 48 + drivers/infiniband/hw/mthca/mthca_cq.c | 984 +++ drivers/infiniband/hw/mthca/mthca_dev.h | 596 ++ drivers/infiniband/hw/mthca/mthca_doorbell.h | 109 + drivers/infiniband/hw/mthca/mthca_eq.c | 905 +++ drivers/infiniband/hw/mthca/mthca_mad.c | 341 + drivers/infiniband/hw/mthca/mthca_main.c | 1280 ++++ drivers/infiniband/hw/mthca/mthca_mcg.c | 335 + drivers/infiniband/hw/mthca/mthca_memfree.c | 760 ++ drivers/infiniband/hw/mthca/mthca_memfree.h | 179 + drivers/infiniband/hw/mthca/mthca_mr.c | 965 +++ drivers/infiniband/hw/mthca/mthca_pd.c | 81 + drivers/infiniband/hw/mthca/mthca_profile.c | 285 + drivers/infiniband/hw/mthca/mthca_profile.h | 59 + drivers/infiniband/hw/mthca/mthca_provider.c | 1378 ++++ drivers/infiniband/hw/mthca/mthca_provider.h | 344 + drivers/infiniband/hw/mthca/mthca_qp.c | 2308 ++++++ drivers/infiniband/hw/mthca/mthca_reset.c | 288 + drivers/infiniband/hw/mthca/mthca_srq.c | 696 ++ drivers/infiniband/hw/mthca/mthca_uar.c | 78 + drivers/infiniband/hw/mthca/mthca_user.h | 112 + drivers/infiniband/hw/mthca/mthca_wqe.h | 131 + drivers/infiniband/hw/nes/Kconfig | 16 + drivers/infiniband/hw/nes/Makefile | 3 + drivers/infiniband/hw/nes/nes.c | 1246 ++++ drivers/infiniband/hw/nes/nes.h | 570 ++ drivers/infiniband/hw/nes/nes_cm.c | 3903 ++++++++++ drivers/infiniband/hw/nes/nes_cm.h | 467 ++ drivers/infiniband/hw/nes/nes_context.h | 193 + drivers/infiniband/hw/nes/nes_hw.c | 3944 ++++++++++ drivers/infiniband/hw/nes/nes_hw.h | 1392 ++++ drivers/infiniband/hw/nes/nes_mgt.c | 1162 +++ drivers/infiniband/hw/nes/nes_mgt.h | 97 + drivers/infiniband/hw/nes/nes_nic.c | 1887 +++++ drivers/infiniband/hw/nes/nes_user.h | 113 + drivers/infiniband/hw/nes/nes_utils.c | 972 +++ drivers/infiniband/hw/nes/nes_verbs.c | 4051 ++++++++++ drivers/infiniband/hw/nes/nes_verbs.h | 188 + drivers/infiniband/hw/qib/Kconfig | 7 + drivers/infiniband/hw/qib/Makefile | 15 + drivers/infiniband/hw/qib/qib.h | 1466 ++++ drivers/infiniband/hw/qib/qib_6120_regs.h | 977 +++ drivers/infiniband/hw/qib/qib_7220.h | 149 + drivers/infiniband/hw/qib/qib_7220_regs.h | 1496 ++++ drivers/infiniband/hw/qib/qib_7322_regs.h | 3163 ++++++++ drivers/infiniband/hw/qib/qib_common.h | 770 ++ drivers/infiniband/hw/qib/qib_cq.c | 485 ++ drivers/infiniband/hw/qib/qib_diag.c | 908 +++ drivers/infiniband/hw/qib/qib_dma.c | 182 + drivers/infiniband/hw/qib/qib_driver.c | 810 ++ drivers/infiniband/hw/qib/qib_eeprom.c | 451 ++ drivers/infiniband/hw/qib/qib_file_ops.c | 2328 ++++++ drivers/infiniband/hw/qib/qib_fs.c | 615 ++ drivers/infiniband/hw/qib/qib_iba6120.c | 3579 +++++++++ drivers/infiniband/hw/qib/qib_iba7220.c | 4642 ++++++++++++ drivers/infiniband/hw/qib/qib_iba7322.c | 8111 +++++++++++++++++++++ drivers/infiniband/hw/qib/qib_init.c | 1588 ++++ drivers/infiniband/hw/qib/qib_intr.c | 241 + drivers/infiniband/hw/qib/qib_keys.c | 360 + drivers/infiniband/hw/qib/qib_mad.c | 2178 ++++++ drivers/infiniband/hw/qib/qib_mad.h | 236 + drivers/infiniband/hw/qib/qib_mmap.c | 174 + drivers/infiniband/hw/qib/qib_mr.c | 505 ++ drivers/infiniband/hw/qib/qib_pcie.c | 759 ++ drivers/infiniband/hw/qib/qib_pio_copy.c | 64 + drivers/infiniband/hw/qib/qib_qp.c | 1281 ++++ drivers/infiniband/hw/qib/qib_qsfp.c | 556 ++ drivers/infiniband/hw/qib/qib_qsfp.h | 189 + drivers/infiniband/hw/qib/qib_rc.c | 2294 ++++++ drivers/infiniband/hw/qib/qib_ruc.c | 822 +++ drivers/infiniband/hw/qib/qib_sd7220.c | 1448 ++++ drivers/infiniband/hw/qib/qib_sdma.c | 976 +++ drivers/infiniband/hw/qib/qib_srq.c | 380 + drivers/infiniband/hw/qib/qib_sysfs.c | 723 ++ drivers/infiniband/hw/qib/qib_twsi.c | 498 ++ drivers/infiniband/hw/qib/qib_tx.c | 562 ++ drivers/infiniband/hw/qib/qib_uc.c | 553 ++ drivers/infiniband/hw/qib/qib_ud.c | 591 ++ drivers/infiniband/hw/qib/qib_user_pages.c | 157 + drivers/infiniband/hw/qib/qib_user_sdma.c | 898 +++ drivers/infiniband/hw/qib/qib_user_sdma.h | 52 + drivers/infiniband/hw/qib/qib_verbs.c | 2291 ++++++ drivers/infiniband/hw/qib/qib_verbs.h | 1097 +++ drivers/infiniband/hw/qib/qib_verbs_mcast.c | 368 + drivers/infiniband/hw/qib/qib_wc_ppc64.c | 62 + drivers/infiniband/hw/qib/qib_wc_x86_64.c | 171 + 230 files changed, 168468 insertions(+) create mode 100644 drivers/infiniband/hw/amso1100/Kbuild create mode 100644 drivers/infiniband/hw/amso1100/Kconfig create mode 100644 drivers/infiniband/hw/amso1100/c2.c create mode 100644 drivers/infiniband/hw/amso1100/c2.h create mode 100644 drivers/infiniband/hw/amso1100/c2_ae.c create mode 100644 drivers/infiniband/hw/amso1100/c2_ae.h create mode 100644 drivers/infiniband/hw/amso1100/c2_alloc.c create mode 100644 drivers/infiniband/hw/amso1100/c2_cm.c create mode 100644 drivers/infiniband/hw/amso1100/c2_cq.c create mode 100644 drivers/infiniband/hw/amso1100/c2_intr.c create mode 100644 drivers/infiniband/hw/amso1100/c2_mm.c create mode 100644 drivers/infiniband/hw/amso1100/c2_mq.c create mode 100644 drivers/infiniband/hw/amso1100/c2_mq.h create mode 100644 drivers/infiniband/hw/amso1100/c2_pd.c create mode 100644 drivers/infiniband/hw/amso1100/c2_provider.c create mode 100644 drivers/infiniband/hw/amso1100/c2_provider.h create mode 100644 drivers/infiniband/hw/amso1100/c2_qp.c create mode 100644 drivers/infiniband/hw/amso1100/c2_rnic.c create mode 100644 drivers/infiniband/hw/amso1100/c2_status.h create mode 100644 drivers/infiniband/hw/amso1100/c2_user.h create mode 100644 drivers/infiniband/hw/amso1100/c2_vq.c create mode 100644 drivers/infiniband/hw/amso1100/c2_vq.h create mode 100644 drivers/infiniband/hw/amso1100/c2_wr.h create mode 100644 drivers/infiniband/hw/cxgb3/Kconfig create mode 100644 drivers/infiniband/hw/cxgb3/Makefile create mode 100644 drivers/infiniband/hw/cxgb3/cxio_dbg.c create mode 100644 drivers/infiniband/hw/cxgb3/cxio_hal.c create mode 100644 drivers/infiniband/hw/cxgb3/cxio_hal.h create mode 100644 drivers/infiniband/hw/cxgb3/cxio_resource.c create mode 100644 drivers/infiniband/hw/cxgb3/cxio_resource.h create mode 100644 drivers/infiniband/hw/cxgb3/cxio_wr.h create mode 100644 drivers/infiniband/hw/cxgb3/iwch.c create mode 100644 drivers/infiniband/hw/cxgb3/iwch.h create mode 100644 drivers/infiniband/hw/cxgb3/iwch_cm.c create mode 100644 drivers/infiniband/hw/cxgb3/iwch_cm.h create mode 100644 drivers/infiniband/hw/cxgb3/iwch_cq.c create mode 100644 drivers/infiniband/hw/cxgb3/iwch_ev.c create mode 100644 drivers/infiniband/hw/cxgb3/iwch_mem.c create mode 100644 drivers/infiniband/hw/cxgb3/iwch_provider.c create mode 100644 drivers/infiniband/hw/cxgb3/iwch_provider.h create mode 100644 drivers/infiniband/hw/cxgb3/iwch_qp.c create mode 100644 drivers/infiniband/hw/cxgb3/iwch_user.h create mode 100644 drivers/infiniband/hw/cxgb3/tcb.h create mode 100644 drivers/infiniband/hw/cxgb4/Kconfig create mode 100644 drivers/infiniband/hw/cxgb4/Makefile create mode 100644 drivers/infiniband/hw/cxgb4/cm.c create mode 100644 drivers/infiniband/hw/cxgb4/cq.c create mode 100644 drivers/infiniband/hw/cxgb4/device.c create mode 100644 drivers/infiniband/hw/cxgb4/ev.c create mode 100644 drivers/infiniband/hw/cxgb4/iw_cxgb4.h create mode 100644 drivers/infiniband/hw/cxgb4/mem.c create mode 100644 drivers/infiniband/hw/cxgb4/provider.c create mode 100644 drivers/infiniband/hw/cxgb4/qp.c create mode 100644 drivers/infiniband/hw/cxgb4/resource.c create mode 100644 drivers/infiniband/hw/cxgb4/t4.h create mode 100644 drivers/infiniband/hw/cxgb4/t4fw_ri_api.h create mode 100644 drivers/infiniband/hw/cxgb4/user.h create mode 100644 drivers/infiniband/hw/ehca/Kconfig create mode 100644 drivers/infiniband/hw/ehca/Makefile create mode 100644 drivers/infiniband/hw/ehca/ehca_av.c create mode 100644 drivers/infiniband/hw/ehca/ehca_classes.h create mode 100644 drivers/infiniband/hw/ehca/ehca_classes_pSeries.h create mode 100644 drivers/infiniband/hw/ehca/ehca_cq.c create mode 100644 drivers/infiniband/hw/ehca/ehca_eq.c create mode 100644 drivers/infiniband/hw/ehca/ehca_hca.c create mode 100644 drivers/infiniband/hw/ehca/ehca_irq.c create mode 100644 drivers/infiniband/hw/ehca/ehca_irq.h create mode 100644 drivers/infiniband/hw/ehca/ehca_iverbs.h create mode 100644 drivers/infiniband/hw/ehca/ehca_main.c create mode 100644 drivers/infiniband/hw/ehca/ehca_mcast.c create mode 100644 drivers/infiniband/hw/ehca/ehca_mrmw.c create mode 100644 drivers/infiniband/hw/ehca/ehca_mrmw.h create mode 100644 drivers/infiniband/hw/ehca/ehca_pd.c create mode 100644 drivers/infiniband/hw/ehca/ehca_qes.h create mode 100644 drivers/infiniband/hw/ehca/ehca_qp.c create mode 100644 drivers/infiniband/hw/ehca/ehca_reqs.c create mode 100644 drivers/infiniband/hw/ehca/ehca_sqp.c create mode 100644 drivers/infiniband/hw/ehca/ehca_tools.h create mode 100644 drivers/infiniband/hw/ehca/ehca_uverbs.c create mode 100644 drivers/infiniband/hw/ehca/hcp_if.c create mode 100644 drivers/infiniband/hw/ehca/hcp_if.h create mode 100644 drivers/infiniband/hw/ehca/hcp_phyp.c create mode 100644 drivers/infiniband/hw/ehca/hcp_phyp.h create mode 100644 drivers/infiniband/hw/ehca/hipz_fns.h create mode 100644 drivers/infiniband/hw/ehca/hipz_fns_core.h create mode 100644 drivers/infiniband/hw/ehca/hipz_hw.h create mode 100644 drivers/infiniband/hw/ehca/ipz_pt_fn.c create mode 100644 drivers/infiniband/hw/ehca/ipz_pt_fn.h create mode 100644 drivers/infiniband/hw/ipath/Kconfig create mode 100644 drivers/infiniband/hw/ipath/Makefile create mode 100644 drivers/infiniband/hw/ipath/ipath_common.h create mode 100644 drivers/infiniband/hw/ipath/ipath_cq.c create mode 100644 drivers/infiniband/hw/ipath/ipath_debug.h create mode 100644 drivers/infiniband/hw/ipath/ipath_diag.c create mode 100644 drivers/infiniband/hw/ipath/ipath_dma.c create mode 100644 drivers/infiniband/hw/ipath/ipath_driver.c create mode 100644 drivers/infiniband/hw/ipath/ipath_eeprom.c create mode 100644 drivers/infiniband/hw/ipath/ipath_file_ops.c create mode 100644 drivers/infiniband/hw/ipath/ipath_fs.c create mode 100644 drivers/infiniband/hw/ipath/ipath_iba6110.c create mode 100644 drivers/infiniband/hw/ipath/ipath_init_chip.c create mode 100644 drivers/infiniband/hw/ipath/ipath_intr.c create mode 100644 drivers/infiniband/hw/ipath/ipath_kernel.h create mode 100644 drivers/infiniband/hw/ipath/ipath_keys.c create mode 100644 drivers/infiniband/hw/ipath/ipath_mad.c create mode 100644 drivers/infiniband/hw/ipath/ipath_mmap.c create mode 100644 drivers/infiniband/hw/ipath/ipath_mr.c create mode 100644 drivers/infiniband/hw/ipath/ipath_qp.c create mode 100644 drivers/infiniband/hw/ipath/ipath_rc.c create mode 100644 drivers/infiniband/hw/ipath/ipath_registers.h create mode 100644 drivers/infiniband/hw/ipath/ipath_ruc.c create mode 100644 drivers/infiniband/hw/ipath/ipath_sdma.c create mode 100644 drivers/infiniband/hw/ipath/ipath_srq.c create mode 100644 drivers/infiniband/hw/ipath/ipath_stats.c create mode 100644 drivers/infiniband/hw/ipath/ipath_sysfs.c create mode 100644 drivers/infiniband/hw/ipath/ipath_uc.c create mode 100644 drivers/infiniband/hw/ipath/ipath_ud.c create mode 100644 drivers/infiniband/hw/ipath/ipath_user_pages.c create mode 100644 drivers/infiniband/hw/ipath/ipath_user_sdma.c create mode 100644 drivers/infiniband/hw/ipath/ipath_user_sdma.h create mode 100644 drivers/infiniband/hw/ipath/ipath_verbs.c create mode 100644 drivers/infiniband/hw/ipath/ipath_verbs.h create mode 100644 drivers/infiniband/hw/ipath/ipath_verbs_mcast.c create mode 100644 drivers/infiniband/hw/ipath/ipath_wc_ppc64.c create mode 100644 drivers/infiniband/hw/ipath/ipath_wc_x86_64.c create mode 100644 drivers/infiniband/hw/mlx4/Kconfig create mode 100644 drivers/infiniband/hw/mlx4/Makefile create mode 100644 drivers/infiniband/hw/mlx4/ah.c create mode 100644 drivers/infiniband/hw/mlx4/cq.c create mode 100644 drivers/infiniband/hw/mlx4/doorbell.c create mode 100644 drivers/infiniband/hw/mlx4/mad.c create mode 100644 drivers/infiniband/hw/mlx4/main.c create mode 100644 drivers/infiniband/hw/mlx4/mlx4_ib.h create mode 100644 drivers/infiniband/hw/mlx4/mr.c create mode 100644 drivers/infiniband/hw/mlx4/qp.c create mode 100644 drivers/infiniband/hw/mlx4/srq.c create mode 100644 drivers/infiniband/hw/mlx4/user.h create mode 100644 drivers/infiniband/hw/mthca/Kconfig create mode 100644 drivers/infiniband/hw/mthca/Makefile create mode 100644 drivers/infiniband/hw/mthca/mthca_allocator.c create mode 100644 drivers/infiniband/hw/mthca/mthca_av.c create mode 100644 drivers/infiniband/hw/mthca/mthca_catas.c create mode 100644 drivers/infiniband/hw/mthca/mthca_cmd.c create mode 100644 drivers/infiniband/hw/mthca/mthca_cmd.h create mode 100644 drivers/infiniband/hw/mthca/mthca_config_reg.h create mode 100644 drivers/infiniband/hw/mthca/mthca_cq.c create mode 100644 drivers/infiniband/hw/mthca/mthca_dev.h create mode 100644 drivers/infiniband/hw/mthca/mthca_doorbell.h create mode 100644 drivers/infiniband/hw/mthca/mthca_eq.c create mode 100644 drivers/infiniband/hw/mthca/mthca_mad.c create mode 100644 drivers/infiniband/hw/mthca/mthca_main.c create mode 100644 drivers/infiniband/hw/mthca/mthca_mcg.c create mode 100644 drivers/infiniband/hw/mthca/mthca_memfree.c create mode 100644 drivers/infiniband/hw/mthca/mthca_memfree.h create mode 100644 drivers/infiniband/hw/mthca/mthca_mr.c create mode 100644 drivers/infiniband/hw/mthca/mthca_pd.c create mode 100644 drivers/infiniband/hw/mthca/mthca_profile.c create mode 100644 drivers/infiniband/hw/mthca/mthca_profile.h create mode 100644 drivers/infiniband/hw/mthca/mthca_provider.c create mode 100644 drivers/infiniband/hw/mthca/mthca_provider.h create mode 100644 drivers/infiniband/hw/mthca/mthca_qp.c create mode 100644 drivers/infiniband/hw/mthca/mthca_reset.c create mode 100644 drivers/infiniband/hw/mthca/mthca_srq.c create mode 100644 drivers/infiniband/hw/mthca/mthca_uar.c create mode 100644 drivers/infiniband/hw/mthca/mthca_user.h create mode 100644 drivers/infiniband/hw/mthca/mthca_wqe.h create mode 100644 drivers/infiniband/hw/nes/Kconfig create mode 100644 drivers/infiniband/hw/nes/Makefile create mode 100644 drivers/infiniband/hw/nes/nes.c create mode 100644 drivers/infiniband/hw/nes/nes.h create mode 100644 drivers/infiniband/hw/nes/nes_cm.c create mode 100644 drivers/infiniband/hw/nes/nes_cm.h create mode 100644 drivers/infiniband/hw/nes/nes_context.h create mode 100644 drivers/infiniband/hw/nes/nes_hw.c create mode 100644 drivers/infiniband/hw/nes/nes_hw.h create mode 100644 drivers/infiniband/hw/nes/nes_mgt.c create mode 100644 drivers/infiniband/hw/nes/nes_mgt.h create mode 100644 drivers/infiniband/hw/nes/nes_nic.c create mode 100644 drivers/infiniband/hw/nes/nes_user.h create mode 100644 drivers/infiniband/hw/nes/nes_utils.c create mode 100644 drivers/infiniband/hw/nes/nes_verbs.c create mode 100644 drivers/infiniband/hw/nes/nes_verbs.h create mode 100644 drivers/infiniband/hw/qib/Kconfig create mode 100644 drivers/infiniband/hw/qib/Makefile create mode 100644 drivers/infiniband/hw/qib/qib.h create mode 100644 drivers/infiniband/hw/qib/qib_6120_regs.h create mode 100644 drivers/infiniband/hw/qib/qib_7220.h create mode 100644 drivers/infiniband/hw/qib/qib_7220_regs.h create mode 100644 drivers/infiniband/hw/qib/qib_7322_regs.h create mode 100644 drivers/infiniband/hw/qib/qib_common.h create mode 100644 drivers/infiniband/hw/qib/qib_cq.c create mode 100644 drivers/infiniband/hw/qib/qib_diag.c create mode 100644 drivers/infiniband/hw/qib/qib_dma.c create mode 100644 drivers/infiniband/hw/qib/qib_driver.c create mode 100644 drivers/infiniband/hw/qib/qib_eeprom.c create mode 100644 drivers/infiniband/hw/qib/qib_file_ops.c create mode 100644 drivers/infiniband/hw/qib/qib_fs.c create mode 100644 drivers/infiniband/hw/qib/qib_iba6120.c create mode 100644 drivers/infiniband/hw/qib/qib_iba7220.c create mode 100644 drivers/infiniband/hw/qib/qib_iba7322.c create mode 100644 drivers/infiniband/hw/qib/qib_init.c create mode 100644 drivers/infiniband/hw/qib/qib_intr.c create mode 100644 drivers/infiniband/hw/qib/qib_keys.c create mode 100644 drivers/infiniband/hw/qib/qib_mad.c create mode 100644 drivers/infiniband/hw/qib/qib_mad.h create mode 100644 drivers/infiniband/hw/qib/qib_mmap.c create mode 100644 drivers/infiniband/hw/qib/qib_mr.c create mode 100644 drivers/infiniband/hw/qib/qib_pcie.c create mode 100644 drivers/infiniband/hw/qib/qib_pio_copy.c create mode 100644 drivers/infiniband/hw/qib/qib_qp.c create mode 100644 drivers/infiniband/hw/qib/qib_qsfp.c create mode 100644 drivers/infiniband/hw/qib/qib_qsfp.h create mode 100644 drivers/infiniband/hw/qib/qib_rc.c create mode 100644 drivers/infiniband/hw/qib/qib_ruc.c create mode 100644 drivers/infiniband/hw/qib/qib_sd7220.c create mode 100644 drivers/infiniband/hw/qib/qib_sdma.c create mode 100644 drivers/infiniband/hw/qib/qib_srq.c create mode 100644 drivers/infiniband/hw/qib/qib_sysfs.c create mode 100644 drivers/infiniband/hw/qib/qib_twsi.c create mode 100644 drivers/infiniband/hw/qib/qib_tx.c create mode 100644 drivers/infiniband/hw/qib/qib_uc.c create mode 100644 drivers/infiniband/hw/qib/qib_ud.c create mode 100644 drivers/infiniband/hw/qib/qib_user_pages.c create mode 100644 drivers/infiniband/hw/qib/qib_user_sdma.c create mode 100644 drivers/infiniband/hw/qib/qib_user_sdma.h create mode 100644 drivers/infiniband/hw/qib/qib_verbs.c create mode 100644 drivers/infiniband/hw/qib/qib_verbs.h create mode 100644 drivers/infiniband/hw/qib/qib_verbs_mcast.c create mode 100644 drivers/infiniband/hw/qib/qib_wc_ppc64.c create mode 100644 drivers/infiniband/hw/qib/qib_wc_x86_64.c (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild new file mode 100644 index 00000000..950dfabc --- /dev/null +++ b/drivers/infiniband/hw/amso1100/Kbuild @@ -0,0 +1,6 @@ +ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG + +obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o + +iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \ + c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig new file mode 100644 index 00000000..e6ce5f20 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/Kconfig @@ -0,0 +1,15 @@ +config INFINIBAND_AMSO1100 + tristate "Ammasso 1100 HCA support" + depends on PCI && INET + ---help--- + This is a low-level driver for the Ammasso 1100 host + channel adapter (HCA). + +config INFINIBAND_AMSO1100_DEBUG + bool "Verbose debugging output" + depends on INFINIBAND_AMSO1100 + default n + ---help--- + This option causes the amso1100 driver to produce a bunch of + debug messages. Select this if you are developing the driver + or trying to diagnose a problem. diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c new file mode 100644 index 00000000..5ce7b9e8 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -0,0 +1,1253 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include "c2.h" +#include "c2_provider.h" + +MODULE_AUTHOR("Tom Tucker "); +MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK + | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; + +static int debug = -1; /* defaults above */ +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static int c2_up(struct net_device *netdev); +static int c2_down(struct net_device *netdev); +static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +static void c2_tx_interrupt(struct net_device *netdev); +static void c2_rx_interrupt(struct net_device *netdev); +static irqreturn_t c2_interrupt(int irq, void *dev_id); +static void c2_tx_timeout(struct net_device *netdev); +static int c2_change_mtu(struct net_device *netdev, int new_mtu); +static void c2_reset(struct c2_port *c2_port); + +static struct pci_device_id c2_pci_table[] = { + { PCI_DEVICE(0x18b8, 0xb001) }, + { 0 } +}; + +MODULE_DEVICE_TABLE(pci, c2_pci_table); + +static void c2_print_macaddr(struct net_device *netdev) +{ + pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq); +} + +static void c2_set_rxbufsize(struct c2_port *c2_port) +{ + struct net_device *netdev = c2_port->netdev; + + if (netdev->mtu > RX_BUF_SIZE) + c2_port->rx_buf_size = + netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) + + NET_IP_ALIGN; + else + c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE; +} + +/* + * Allocate TX ring elements and chain them together. + * One-to-one association of adapter descriptors with ring elements. + */ +static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr, + dma_addr_t base, void __iomem * mmio_txp_ring) +{ + struct c2_tx_desc *tx_desc; + struct c2_txp_desc __iomem *txp_desc; + struct c2_element *elem; + int i; + + tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL); + if (!tx_ring->start) + return -ENOMEM; + + elem = tx_ring->start; + tx_desc = vaddr; + txp_desc = mmio_txp_ring; + for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) { + tx_desc->len = 0; + tx_desc->status = 0; + + /* Set TXP_HTXD_UNINIT */ + __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL), + (void __iomem *) txp_desc + C2_TXP_ADDR); + __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN); + __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT), + (void __iomem *) txp_desc + C2_TXP_FLAGS); + + elem->skb = NULL; + elem->ht_desc = tx_desc; + elem->hw_desc = txp_desc; + + if (i == tx_ring->count - 1) { + elem->next = tx_ring->start; + tx_desc->next_offset = base; + } else { + elem->next = elem + 1; + tx_desc->next_offset = + base + (i + 1) * sizeof(*tx_desc); + } + } + + tx_ring->to_use = tx_ring->to_clean = tx_ring->start; + + return 0; +} + +/* + * Allocate RX ring elements and chain them together. + * One-to-one association of adapter descriptors with ring elements. + */ +static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr, + dma_addr_t base, void __iomem * mmio_rxp_ring) +{ + struct c2_rx_desc *rx_desc; + struct c2_rxp_desc __iomem *rxp_desc; + struct c2_element *elem; + int i; + + rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL); + if (!rx_ring->start) + return -ENOMEM; + + elem = rx_ring->start; + rx_desc = vaddr; + rxp_desc = mmio_rxp_ring; + for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) { + rx_desc->len = 0; + rx_desc->status = 0; + + /* Set RXP_HRXD_UNINIT */ + __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK), + (void __iomem *) rxp_desc + C2_RXP_STATUS); + __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT); + __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN); + __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL), + (void __iomem *) rxp_desc + C2_RXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT), + (void __iomem *) rxp_desc + C2_RXP_FLAGS); + + elem->skb = NULL; + elem->ht_desc = rx_desc; + elem->hw_desc = rxp_desc; + + if (i == rx_ring->count - 1) { + elem->next = rx_ring->start; + rx_desc->next_offset = base; + } else { + elem->next = elem + 1; + rx_desc->next_offset = + base + (i + 1) * sizeof(*rx_desc); + } + } + + rx_ring->to_use = rx_ring->to_clean = rx_ring->start; + + return 0; +} + +/* Setup buffer for receiving */ +static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem) +{ + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_rx_desc *rx_desc = elem->ht_desc; + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen; + struct c2_rxp_hdr *rxp_hdr; + + skb = dev_alloc_skb(c2_port->rx_buf_size); + if (unlikely(!skb)) { + pr_debug("%s: out of memory for receive\n", + c2_port->netdev->name); + return -ENOMEM; + } + + /* Zero out the rxp hdr in the sk_buff */ + memset(skb->data, 0, sizeof(*rxp_hdr)); + + skb->dev = c2_port->netdev; + + maplen = c2_port->rx_buf_size; + mapaddr = + pci_map_single(c2dev->pcidev, skb->data, maplen, + PCI_DMA_FROMDEVICE); + + /* Set the sk_buff RXP_header to RXP_HRXD_READY */ + rxp_hdr = (struct c2_rxp_hdr *) skb->data; + rxp_hdr->flags = RXP_HRXD_READY; + + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)), + elem->hw_desc + C2_RXP_LEN); + __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY), + elem->hw_desc + C2_RXP_FLAGS); + + elem->skb = skb; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + rx_desc->len = maplen; + + return 0; +} + +/* + * Allocate buffers for the Rx ring + * For receive: rx_ring.to_clean is next received frame + */ +static int c2_rx_fill(struct c2_port *c2_port) +{ + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + int ret = 0; + + elem = rx_ring->start; + do { + if (c2_rx_alloc(c2_port, elem)) { + ret = 1; + break; + } + } while ((elem = elem->next) != rx_ring->start); + + rx_ring->to_clean = rx_ring->start; + return ret; +} + +/* Free all buffers in RX ring, assumes receiver stopped */ +static void c2_rx_clean(struct c2_port *c2_port) +{ + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + struct c2_rx_desc *rx_desc; + + elem = rx_ring->start; + do { + rx_desc = elem->ht_desc; + rx_desc->len = 0; + + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); + __raw_writew(0, elem->hw_desc + C2_RXP_LEN); + __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL), + elem->hw_desc + C2_RXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT), + elem->hw_desc + C2_RXP_FLAGS); + + if (elem->skb) { + pci_unmap_single(c2dev->pcidev, elem->mapaddr, + elem->maplen, PCI_DMA_FROMDEVICE); + dev_kfree_skb(elem->skb); + elem->skb = NULL; + } + } while ((elem = elem->next) != rx_ring->start); +} + +static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem) +{ + struct c2_tx_desc *tx_desc = elem->ht_desc; + + tx_desc->len = 0; + + pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen, + PCI_DMA_TODEVICE); + + if (elem->skb) { + dev_kfree_skb_any(elem->skb); + elem->skb = NULL; + } + + return 0; +} + +/* Free all buffers in TX ring, assumes transmitter stopped */ +static void c2_tx_clean(struct c2_port *c2_port) +{ + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + struct c2_txp_desc txp_htxd; + int retry; + unsigned long flags; + + spin_lock_irqsave(&c2_port->tx_lock, flags); + + elem = tx_ring->start; + + do { + retry = 0; + do { + txp_htxd.flags = + readw(elem->hw_desc + C2_TXP_FLAGS); + + if (txp_htxd.flags == TXP_HTXD_READY) { + retry = 1; + __raw_writew(0, + elem->hw_desc + C2_TXP_LEN); + __raw_writeq(0, + elem->hw_desc + C2_TXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE), + elem->hw_desc + C2_TXP_FLAGS); + c2_port->netdev->stats.tx_dropped++; + break; + } else { + __raw_writew(0, + elem->hw_desc + C2_TXP_LEN); + __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL), + elem->hw_desc + C2_TXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT), + elem->hw_desc + C2_TXP_FLAGS); + } + + c2_tx_free(c2_port->c2dev, elem); + + } while ((elem = elem->next) != tx_ring->start); + } while (retry); + + c2_port->tx_avail = c2_port->tx_ring.count - 1; + c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start; + + if (c2_port->tx_avail > MAX_SKB_FRAGS + 1) + netif_wake_queue(c2_port->netdev); + + spin_unlock_irqrestore(&c2_port->tx_lock, flags); +} + +/* + * Process transmit descriptors marked 'DONE' by the firmware, + * freeing up their unneeded sk_buffs. + */ +static void c2_tx_interrupt(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + struct c2_txp_desc txp_htxd; + + spin_lock(&c2_port->tx_lock); + + for (elem = tx_ring->to_clean; elem != tx_ring->to_use; + elem = elem->next) { + txp_htxd.flags = + be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS)); + + if (txp_htxd.flags != TXP_HTXD_DONE) + break; + + if (netif_msg_tx_done(c2_port)) { + /* PCI reads are expensive in fast path */ + txp_htxd.len = + be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN)); + pr_debug("%s: tx done slot %3Zu status 0x%x len " + "%5u bytes\n", + netdev->name, elem - tx_ring->start, + txp_htxd.flags, txp_htxd.len); + } + + c2_tx_free(c2dev, elem); + ++(c2_port->tx_avail); + } + + tx_ring->to_clean = elem; + + if (netif_queue_stopped(netdev) + && c2_port->tx_avail > MAX_SKB_FRAGS + 1) + netif_wake_queue(netdev); + + spin_unlock(&c2_port->tx_lock); +} + +static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) +{ + struct c2_rx_desc *rx_desc = elem->ht_desc; + struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; + + if (rxp_hdr->status != RXP_HRXD_OK || + rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) { + pr_debug("BAD RXP_HRXD\n"); + pr_debug(" rx_desc : %p\n", rx_desc); + pr_debug(" index : %Zu\n", + elem - c2_port->rx_ring.start); + pr_debug(" len : %u\n", rx_desc->len); + pr_debug(" rxp_hdr : %p [PA %p]\n", rxp_hdr, + (void *) __pa((unsigned long) rxp_hdr)); + pr_debug(" flags : 0x%x\n", rxp_hdr->flags); + pr_debug(" status: 0x%x\n", rxp_hdr->status); + pr_debug(" len : %u\n", rxp_hdr->len); + pr_debug(" rsvd : 0x%x\n", rxp_hdr->rsvd); + } + + /* Setup the skb for reuse since we're dropping this pkt */ + elem->skb->data = elem->skb->head; + skb_reset_tail_pointer(elem->skb); + + /* Zero out the rxp hdr in the sk_buff */ + memset(elem->skb->data, 0, sizeof(*rxp_hdr)); + + /* Write the descriptor to the adapter's rx ring */ + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); + __raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)), + elem->hw_desc + C2_RXP_LEN); + __raw_writeq((__force u64) cpu_to_be64(elem->mapaddr), + elem->hw_desc + C2_RXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY), + elem->hw_desc + C2_RXP_FLAGS); + + pr_debug("packet dropped\n"); + c2_port->netdev->stats.rx_dropped++; +} + +static void c2_rx_interrupt(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + struct c2_rx_desc *rx_desc; + struct c2_rxp_hdr *rxp_hdr; + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen, buflen; + unsigned long flags; + + spin_lock_irqsave(&c2dev->lock, flags); + + /* Begin where we left off */ + rx_ring->to_clean = rx_ring->start + c2dev->cur_rx; + + for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean; + elem = elem->next) { + rx_desc = elem->ht_desc; + mapaddr = elem->mapaddr; + maplen = elem->maplen; + skb = elem->skb; + rxp_hdr = (struct c2_rxp_hdr *) skb->data; + + if (rxp_hdr->flags != RXP_HRXD_DONE) + break; + buflen = rxp_hdr->len; + + /* Sanity check the RXP header */ + if (rxp_hdr->status != RXP_HRXD_OK || + buflen > (rx_desc->len - sizeof(*rxp_hdr))) { + c2_rx_error(c2_port, elem); + continue; + } + + /* + * Allocate and map a new skb for replenishing the host + * RX desc + */ + if (c2_rx_alloc(c2_port, elem)) { + c2_rx_error(c2_port, elem); + continue; + } + + /* Unmap the old skb */ + pci_unmap_single(c2dev->pcidev, mapaddr, maplen, + PCI_DMA_FROMDEVICE); + + prefetch(skb->data); + + /* + * Skip past the leading 8 bytes comprising of the + * "struct c2_rxp_hdr", prepended by the adapter + * to the usual Ethernet header ("struct ethhdr"), + * to the start of the raw Ethernet packet. + * + * Fix up the various fields in the sk_buff before + * passing it up to netif_rx(). The transfer size + * (in bytes) specified by the adapter len field of + * the "struct rxp_hdr_t" does NOT include the + * "sizeof(struct c2_rxp_hdr)". + */ + skb->data += sizeof(*rxp_hdr); + skb_set_tail_pointer(skb, buflen); + skb->len = buflen; + skb->protocol = eth_type_trans(skb, netdev); + + netif_rx(skb); + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += buflen; + } + + /* Save where we left off */ + rx_ring->to_clean = elem; + c2dev->cur_rx = elem - rx_ring->start; + C2_SET_CUR_RX(c2dev, c2dev->cur_rx); + + spin_unlock_irqrestore(&c2dev->lock, flags); +} + +/* + * Handle netisr0 TX & RX interrupts. + */ +static irqreturn_t c2_interrupt(int irq, void *dev_id) +{ + unsigned int netisr0, dmaisr; + int handled = 0; + struct c2_dev *c2dev = (struct c2_dev *) dev_id; + + /* Process CCILNET interrupts */ + netisr0 = readl(c2dev->regs + C2_NISR0); + if (netisr0) { + + /* + * There is an issue with the firmware that always + * provides the status of RX for both TX & RX + * interrupts. So process both queues here. + */ + c2_rx_interrupt(c2dev->netdev); + c2_tx_interrupt(c2dev->netdev); + + /* Clear the interrupt */ + writel(netisr0, c2dev->regs + C2_NISR0); + handled++; + } + + /* Process RNIC interrupts */ + dmaisr = readl(c2dev->regs + C2_DISR); + if (dmaisr) { + writel(dmaisr, c2dev->regs + C2_DISR); + c2_rnic_interrupt(c2dev); + handled++; + } + + if (handled) { + return IRQ_HANDLED; + } else { + return IRQ_NONE; + } +} + +static int c2_up(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_element *elem; + struct c2_rxp_hdr *rxp_hdr; + struct in_device *in_dev; + size_t rx_size, tx_size; + int ret, i; + unsigned int netimr0; + + if (netif_msg_ifup(c2_port)) + pr_debug("%s: enabling interface\n", netdev->name); + + /* Set the Rx buffer size based on MTU */ + c2_set_rxbufsize(c2_port); + + /* Allocate DMA'able memory for Tx/Rx host descriptor rings */ + rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc); + tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc); + + c2_port->mem_size = tx_size + rx_size; + c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size, + &c2_port->dma); + if (c2_port->mem == NULL) { + pr_debug("Unable to allocate memory for " + "host descriptor rings\n"); + return -ENOMEM; + } + + memset(c2_port->mem, 0, c2_port->mem_size); + + /* Create the Rx host descriptor ring */ + if ((ret = + c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma, + c2dev->mmio_rxp_ring))) { + pr_debug("Unable to create RX ring\n"); + goto bail0; + } + + /* Allocate Rx buffers for the host descriptor ring */ + if (c2_rx_fill(c2_port)) { + pr_debug("Unable to fill RX ring\n"); + goto bail1; + } + + /* Create the Tx host descriptor ring */ + if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size, + c2_port->dma + rx_size, + c2dev->mmio_txp_ring))) { + pr_debug("Unable to create TX ring\n"); + goto bail1; + } + + /* Set the TX pointer to where we left off */ + c2_port->tx_avail = c2_port->tx_ring.count - 1; + c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean = + c2_port->tx_ring.start + c2dev->cur_tx; + + /* missing: Initialize MAC */ + + BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean); + + /* Reset the adapter, ensures the driver is in sync with the RXP */ + c2_reset(c2_port); + + /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */ + for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count; + i++, elem++) { + rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; + rxp_hdr->flags = 0; + __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY), + elem->hw_desc + C2_RXP_FLAGS); + } + + /* Enable network packets */ + netif_start_queue(netdev); + + /* Enable IRQ */ + writel(0, c2dev->regs + C2_IDIS); + netimr0 = readl(c2dev->regs + C2_NIMR0); + netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT); + writel(netimr0, c2dev->regs + C2_NIMR0); + + /* Tell the stack to ignore arp requests for ipaddrs bound to + * other interfaces. This is needed to prevent the host stack + * from responding to arp requests to the ipaddr bound on the + * rdma interface. + */ + in_dev = in_dev_get(netdev); + IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1); + in_dev_put(in_dev); + + return 0; + + bail1: + c2_rx_clean(c2_port); + kfree(c2_port->rx_ring.start); + + bail0: + pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, + c2_port->dma); + + return ret; +} + +static int c2_down(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + + if (netif_msg_ifdown(c2_port)) + pr_debug("%s: disabling interface\n", + netdev->name); + + /* Wait for all the queued packets to get sent */ + c2_tx_interrupt(netdev); + + /* Disable network packets */ + netif_stop_queue(netdev); + + /* Disable IRQs by clearing the interrupt mask */ + writel(1, c2dev->regs + C2_IDIS); + writel(0, c2dev->regs + C2_NIMR0); + + /* missing: Stop transmitter */ + + /* missing: Stop receiver */ + + /* Reset the adapter, ensures the driver is in sync with the RXP */ + c2_reset(c2_port); + + /* missing: Turn off LEDs here */ + + /* Free all buffers in the host descriptor rings */ + c2_tx_clean(c2_port); + c2_rx_clean(c2_port); + + /* Free the host descriptor rings */ + kfree(c2_port->rx_ring.start); + kfree(c2_port->tx_ring.start); + pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, + c2_port->dma); + + return 0; +} + +static void c2_reset(struct c2_port *c2_port) +{ + struct c2_dev *c2dev = c2_port->c2dev; + unsigned int cur_rx = c2dev->cur_rx; + + /* Tell the hardware to quiesce */ + C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI); + + /* + * The hardware will reset the C2_PCI_HRX_QUI bit once + * the RXP is quiesced. Wait 2 seconds for this. + */ + ssleep(2); + + cur_rx = C2_GET_CUR_RX(c2dev); + + if (cur_rx & C2_PCI_HRX_QUI) + pr_debug("c2_reset: failed to quiesce the hardware!\n"); + + cur_rx &= ~C2_PCI_HRX_QUI; + + c2dev->cur_rx = cur_rx; + + pr_debug("Current RX: %u\n", c2dev->cur_rx); +} + +static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + dma_addr_t mapaddr; + u32 maplen; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&c2_port->tx_lock, flags); + + if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) { + netif_stop_queue(netdev); + spin_unlock_irqrestore(&c2_port->tx_lock, flags); + + pr_debug("%s: Tx ring full when queue awake!\n", + netdev->name); + return NETDEV_TX_BUSY; + } + + maplen = skb_headlen(skb); + mapaddr = + pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE); + + elem = tx_ring->to_use; + elem->skb = skb; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + + /* Tell HW to xmit */ + __raw_writeq((__force u64) cpu_to_be64(mapaddr), + elem->hw_desc + C2_TXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(maplen), + elem->hw_desc + C2_TXP_LEN); + __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), + elem->hw_desc + C2_TXP_FLAGS); + + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += maplen; + + /* Loop thru additional data fragments and queue them */ + if (skb_shinfo(skb)->nr_frags) { + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + maplen = skb_frag_size(frag); + mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag, + 0, maplen, DMA_TO_DEVICE); + elem = elem->next; + elem->skb = NULL; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + + /* Tell HW to xmit */ + __raw_writeq((__force u64) cpu_to_be64(mapaddr), + elem->hw_desc + C2_TXP_ADDR); + __raw_writew((__force u16) cpu_to_be16(maplen), + elem->hw_desc + C2_TXP_LEN); + __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), + elem->hw_desc + C2_TXP_FLAGS); + + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += maplen; + } + } + + tx_ring->to_use = elem->next; + c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1); + + if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) { + netif_stop_queue(netdev); + if (netif_msg_tx_queued(c2_port)) + pr_debug("%s: transmit queue full\n", + netdev->name); + } + + spin_unlock_irqrestore(&c2_port->tx_lock, flags); + + netdev->trans_start = jiffies; + + return NETDEV_TX_OK; +} + +static void c2_tx_timeout(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + + if (netif_msg_timer(c2_port)) + pr_debug("%s: tx timeout\n", netdev->name); + + c2_tx_clean(c2_port); +} + +static int c2_change_mtu(struct net_device *netdev, int new_mtu) +{ + int ret = 0; + + if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) + return -EINVAL; + + netdev->mtu = new_mtu; + + if (netif_running(netdev)) { + c2_down(netdev); + + c2_up(netdev); + } + + return ret; +} + +static const struct net_device_ops c2_netdev = { + .ndo_open = c2_up, + .ndo_stop = c2_down, + .ndo_start_xmit = c2_xmit_frame, + .ndo_tx_timeout = c2_tx_timeout, + .ndo_change_mtu = c2_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +/* Initialize network device */ +static struct net_device *c2_devinit(struct c2_dev *c2dev, + void __iomem * mmio_addr) +{ + struct c2_port *c2_port = NULL; + struct net_device *netdev = alloc_etherdev(sizeof(*c2_port)); + + if (!netdev) { + pr_debug("c2_port etherdev alloc failed"); + return NULL; + } + + SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); + + netdev->netdev_ops = &c2_netdev; + netdev->watchdog_timeo = C2_TX_TIMEOUT; + netdev->irq = c2dev->pcidev->irq; + + c2_port = netdev_priv(netdev); + c2_port->netdev = netdev; + c2_port->c2dev = c2dev; + c2_port->msg_enable = netif_msg_init(debug, default_msg); + c2_port->tx_ring.count = C2_NUM_TX_DESC; + c2_port->rx_ring.count = C2_NUM_RX_DESC; + + spin_lock_init(&c2_port->tx_lock); + + /* Copy our 48-bit ethernet hardware address */ + memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6); + + /* Validate the MAC address */ + if (!is_valid_ether_addr(netdev->dev_addr)) { + pr_debug("Invalid MAC Address\n"); + c2_print_macaddr(netdev); + free_netdev(netdev); + return NULL; + } + + c2dev->netdev = netdev; + + return netdev; +} + +static int __devinit c2_probe(struct pci_dev *pcidev, + const struct pci_device_id *ent) +{ + int ret = 0, i; + unsigned long reg0_start, reg0_flags, reg0_len; + unsigned long reg2_start, reg2_flags, reg2_len; + unsigned long reg4_start, reg4_flags, reg4_len; + unsigned kva_map_size; + struct net_device *netdev = NULL; + struct c2_dev *c2dev = NULL; + void __iomem *mmio_regs = NULL; + + printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n", + DRV_VERSION); + + /* Enable PCI device */ + ret = pci_enable_device(pcidev); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to enable PCI device\n", + pci_name(pcidev)); + goto bail0; + } + + reg0_start = pci_resource_start(pcidev, BAR_0); + reg0_len = pci_resource_len(pcidev, BAR_0); + reg0_flags = pci_resource_flags(pcidev, BAR_0); + + reg2_start = pci_resource_start(pcidev, BAR_2); + reg2_len = pci_resource_len(pcidev, BAR_2); + reg2_flags = pci_resource_flags(pcidev, BAR_2); + + reg4_start = pci_resource_start(pcidev, BAR_4); + reg4_len = pci_resource_len(pcidev, BAR_4); + reg4_flags = pci_resource_flags(pcidev, BAR_4); + + pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len); + pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len); + pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len); + + /* Make sure PCI base addr are MMIO */ + if (!(reg0_flags & IORESOURCE_MEM) || + !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) { + printk(KERN_ERR PFX "PCI regions not an MMIO resource\n"); + ret = -ENODEV; + goto bail1; + } + + /* Check for weird/broken PCI region reporting */ + if ((reg0_len < C2_REG0_SIZE) || + (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) { + printk(KERN_ERR PFX "Invalid PCI region sizes\n"); + ret = -ENODEV; + goto bail1; + } + + /* Reserve PCI I/O and memory resources */ + ret = pci_request_regions(pcidev, DRV_NAME); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to request regions\n", + pci_name(pcidev)); + goto bail1; + } + + if ((sizeof(dma_addr_t) > 4)) { + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64)); + if (ret < 0) { + printk(KERN_ERR PFX "64b DMA configuration failed\n"); + goto bail2; + } + } else { + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); + if (ret < 0) { + printk(KERN_ERR PFX "32b DMA configuration failed\n"); + goto bail2; + } + } + + /* Enables bus-mastering on the device */ + pci_set_master(pcidev); + + /* Remap the adapter PCI registers in BAR4 */ + mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, + sizeof(struct c2_adapter_pci_regs)); + if (!mmio_regs) { + printk(KERN_ERR PFX + "Unable to remap adapter PCI registers in BAR4\n"); + ret = -EIO; + goto bail2; + } + + /* Validate PCI regs magic */ + for (i = 0; i < sizeof(c2_magic); i++) { + if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) { + printk(KERN_ERR PFX "Downlevel Firmware boot loader " + "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash " + "utility to update your boot loader\n", + i + 1, sizeof(c2_magic), + readb(mmio_regs + C2_REGS_MAGIC + i), + c2_magic[i]); + printk(KERN_ERR PFX "Adapter not claimed\n"); + iounmap(mmio_regs); + ret = -EIO; + goto bail2; + } + } + + /* Validate the adapter version */ + if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) { + printk(KERN_ERR PFX "Version mismatch " + "[fw=%u, c2=%u], Adapter not claimed\n", + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)), + C2_VERSION); + ret = -EINVAL; + iounmap(mmio_regs); + goto bail2; + } + + /* Validate the adapter IVN */ + if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) { + printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using " + "the OpenIB device support kit. " + "[fw=0x%x, c2=0x%x], Adapter not claimed\n", + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)), + C2_IVN); + ret = -EINVAL; + iounmap(mmio_regs); + goto bail2; + } + + /* Allocate hardware structure */ + c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev)); + if (!c2dev) { + printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", + pci_name(pcidev)); + ret = -ENOMEM; + iounmap(mmio_regs); + goto bail2; + } + + memset(c2dev, 0, sizeof(*c2dev)); + spin_lock_init(&c2dev->lock); + c2dev->pcidev = pcidev; + c2dev->cur_tx = 0; + + /* Get the last RX index */ + c2dev->cur_rx = + (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) - + 0xffffc000) / sizeof(struct c2_rxp_desc); + + /* Request an interrupt line for the driver */ + ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev); + if (ret) { + printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", + pci_name(pcidev), pcidev->irq); + iounmap(mmio_regs); + goto bail3; + } + + /* Set driver specific data */ + pci_set_drvdata(pcidev, c2dev); + + /* Initialize network device */ + if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) { + iounmap(mmio_regs); + goto bail4; + } + + /* Save off the actual size prior to unmapping mmio_regs */ + kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE)); + + /* Unmap the adapter PCI registers in BAR4 */ + iounmap(mmio_regs); + + /* Register network device */ + ret = register_netdev(netdev); + if (ret) { + printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", + ret); + goto bail5; + } + + /* Disable network packets */ + netif_stop_queue(netdev); + + /* Remap the adapter HRXDQ PA space to kernel VA space */ + c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET, + C2_RXP_HRXDQ_SIZE); + if (!c2dev->mmio_rxp_ring) { + printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n"); + ret = -EIO; + goto bail6; + } + + /* Remap the adapter HTXDQ PA space to kernel VA space */ + c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET, + C2_TXP_HTXDQ_SIZE); + if (!c2dev->mmio_txp_ring) { + printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n"); + ret = -EIO; + goto bail7; + } + + /* Save off the current RX index in the last 4 bytes of the TXP Ring */ + C2_SET_CUR_RX(c2dev, c2dev->cur_rx); + + /* Remap the PCI registers in adapter BAR0 to kernel VA space */ + c2dev->regs = ioremap_nocache(reg0_start, reg0_len); + if (!c2dev->regs) { + printk(KERN_ERR PFX "Unable to remap BAR0\n"); + ret = -EIO; + goto bail8; + } + + /* Remap the PCI registers in adapter BAR4 to kernel VA space */ + c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET; + c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, + kva_map_size); + if (!c2dev->kva) { + printk(KERN_ERR PFX "Unable to remap BAR4\n"); + ret = -EIO; + goto bail9; + } + + /* Print out the MAC address */ + c2_print_macaddr(netdev); + + ret = c2_rnic_init(c2dev); + if (ret) { + printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret); + goto bail10; + } + + if (c2_register_device(c2dev)) + goto bail10; + + return 0; + + bail10: + iounmap(c2dev->kva); + + bail9: + iounmap(c2dev->regs); + + bail8: + iounmap(c2dev->mmio_txp_ring); + + bail7: + iounmap(c2dev->mmio_rxp_ring); + + bail6: + unregister_netdev(netdev); + + bail5: + free_netdev(netdev); + + bail4: + free_irq(pcidev->irq, c2dev); + + bail3: + ib_dealloc_device(&c2dev->ibdev); + + bail2: + pci_release_regions(pcidev); + + bail1: + pci_disable_device(pcidev); + + bail0: + return ret; +} + +static void __devexit c2_remove(struct pci_dev *pcidev) +{ + struct c2_dev *c2dev = pci_get_drvdata(pcidev); + struct net_device *netdev = c2dev->netdev; + + /* Unregister with OpenIB */ + c2_unregister_device(c2dev); + + /* Clean up the RNIC resources */ + c2_rnic_term(c2dev); + + /* Remove network device from the kernel */ + unregister_netdev(netdev); + + /* Free network device */ + free_netdev(netdev); + + /* Free the interrupt line */ + free_irq(pcidev->irq, c2dev); + + /* missing: Turn LEDs off here */ + + /* Unmap adapter PA space */ + iounmap(c2dev->kva); + iounmap(c2dev->regs); + iounmap(c2dev->mmio_txp_ring); + iounmap(c2dev->mmio_rxp_ring); + + /* Free the hardware structure */ + ib_dealloc_device(&c2dev->ibdev); + + /* Release reserved PCI I/O and memory resources */ + pci_release_regions(pcidev); + + /* Disable PCI device */ + pci_disable_device(pcidev); + + /* Clear driver specific data */ + pci_set_drvdata(pcidev, NULL); +} + +static struct pci_driver c2_pci_driver = { + .name = DRV_NAME, + .id_table = c2_pci_table, + .probe = c2_probe, + .remove = __devexit_p(c2_remove), +}; + +static int __init c2_init_module(void) +{ + return pci_register_driver(&c2_pci_driver); +} + +static void __exit c2_exit_module(void) +{ + pci_unregister_driver(&c2_pci_driver); +} + +module_init(c2_init_module); +module_exit(c2_exit_module); diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h new file mode 100644 index 00000000..6ae698e6 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __C2_H +#define __C2_H + +#include +#include +#include +#include +#include +#include + +#include "c2_provider.h" +#include "c2_mq.h" +#include "c2_status.h" + +#define DRV_NAME "c2" +#define DRV_VERSION "1.1" +#define PFX DRV_NAME ": " + +#define BAR_0 0 +#define BAR_2 2 +#define BAR_4 4 + +#define RX_BUF_SIZE (1536 + 8) +#define ETH_JUMBO_MTU 9000 +#define C2_MAGIC "CEPHEUS" +#define C2_VERSION 4 +#define C2_IVN (18 & 0x7fffffff) + +#define C2_REG0_SIZE (16 * 1024) +#define C2_REG2_SIZE (2 * 1024 * 1024) +#define C2_REG4_SIZE (256 * 1024 * 1024) +#define C2_NUM_TX_DESC 341 +#define C2_NUM_RX_DESC 256 +#define C2_PCI_REGS_OFFSET (0x10000) +#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2)) +#define C2_RXP_HRXDQ_SIZE (4096) +#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE) +#define C2_TXP_HTXDQ_SIZE (4096) +#define C2_TX_TIMEOUT (6*HZ) + +/* CEPHEUS */ +static const u8 c2_magic[] = { + 0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53 +}; + +enum adapter_pci_regs { + C2_REGS_MAGIC = 0x0000, + C2_REGS_VERS = 0x0008, + C2_REGS_IVN = 0x000C, + C2_REGS_PCI_WINSIZE = 0x0010, + C2_REGS_Q0_QSIZE = 0x0014, + C2_REGS_Q0_MSGSIZE = 0x0018, + C2_REGS_Q0_POOLSTART = 0x001C, + C2_REGS_Q0_SHARED = 0x0020, + C2_REGS_Q1_QSIZE = 0x0024, + C2_REGS_Q1_MSGSIZE = 0x0028, + C2_REGS_Q1_SHARED = 0x0030, + C2_REGS_Q2_QSIZE = 0x0034, + C2_REGS_Q2_MSGSIZE = 0x0038, + C2_REGS_Q2_SHARED = 0x0040, + C2_REGS_ENADDR = 0x004C, + C2_REGS_RDMA_ENADDR = 0x0054, + C2_REGS_HRX_CUR = 0x006C, +}; + +struct c2_adapter_pci_regs { + char reg_magic[8]; + u32 version; + u32 ivn; + u32 pci_window_size; + u32 q0_q_size; + u32 q0_msg_size; + u32 q0_pool_start; + u32 q0_shared; + u32 q1_q_size; + u32 q1_msg_size; + u32 q1_pool_start; + u32 q1_shared; + u32 q2_q_size; + u32 q2_msg_size; + u32 q2_pool_start; + u32 q2_shared; + u32 log_start; + u32 log_size; + u8 host_enaddr[8]; + u8 rdma_enaddr[8]; + u32 crash_entry; + u32 crash_ready[2]; + u32 fw_txd_cur; + u32 fw_hrxd_cur; + u32 fw_rxd_cur; +}; + +enum pci_regs { + C2_HISR = 0x0000, + C2_DISR = 0x0004, + C2_HIMR = 0x0008, + C2_DIMR = 0x000C, + C2_NISR0 = 0x0010, + C2_NISR1 = 0x0014, + C2_NIMR0 = 0x0018, + C2_NIMR1 = 0x001C, + C2_IDIS = 0x0020, +}; + +enum { + C2_PCI_HRX_INT = 1 << 8, + C2_PCI_HTX_INT = 1 << 17, + C2_PCI_HRX_QUI = 1 << 31, +}; + +/* + * Cepheus registers in BAR0. + */ +struct c2_pci_regs { + u32 hostisr; + u32 dmaisr; + u32 hostimr; + u32 dmaimr; + u32 netisr0; + u32 netisr1; + u32 netimr0; + u32 netimr1; + u32 int_disable; +}; + +/* TXP flags */ +enum c2_txp_flags { + TXP_HTXD_DONE = 0, + TXP_HTXD_READY = 1 << 0, + TXP_HTXD_UNINIT = 1 << 1, +}; + +/* RXP flags */ +enum c2_rxp_flags { + RXP_HRXD_UNINIT = 0, + RXP_HRXD_READY = 1 << 0, + RXP_HRXD_DONE = 1 << 1, +}; + +/* RXP status */ +enum c2_rxp_status { + RXP_HRXD_ZERO = 0, + RXP_HRXD_OK = 1 << 0, + RXP_HRXD_BUF_OV = 1 << 1, +}; + +/* TXP descriptor fields */ +enum txp_desc { + C2_TXP_FLAGS = 0x0000, + C2_TXP_LEN = 0x0002, + C2_TXP_ADDR = 0x0004, +}; + +/* RXP descriptor fields */ +enum rxp_desc { + C2_RXP_FLAGS = 0x0000, + C2_RXP_STATUS = 0x0002, + C2_RXP_COUNT = 0x0004, + C2_RXP_LEN = 0x0006, + C2_RXP_ADDR = 0x0008, +}; + +struct c2_txp_desc { + u16 flags; + u16 len; + u64 addr; +} __attribute__ ((packed)); + +struct c2_rxp_desc { + u16 flags; + u16 status; + u16 count; + u16 len; + u64 addr; +} __attribute__ ((packed)); + +struct c2_rxp_hdr { + u16 flags; + u16 status; + u16 len; + u16 rsvd; +} __attribute__ ((packed)); + +struct c2_tx_desc { + u32 len; + u32 status; + dma_addr_t next_offset; +}; + +struct c2_rx_desc { + u32 len; + u32 status; + dma_addr_t next_offset; +}; + +struct c2_alloc { + u32 last; + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c2_array { + struct { + void **page; + int used; + } *page_list; +}; + +/* + * The MQ shared pointer pool is organized as a linked list of + * chunks. Each chunk contains a linked list of free shared pointers + * that can be allocated to a given user mode client. + * + */ +struct sp_chunk { + struct sp_chunk *next; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + u16 head; + u16 shared_ptr[0]; +}; + +struct c2_pd_table { + u32 last; + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c2_qp_table { + struct idr idr; + spinlock_t lock; + int last; +}; + +struct c2_element { + struct c2_element *next; + void *ht_desc; /* host descriptor */ + void __iomem *hw_desc; /* hardware descriptor */ + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen; +}; + +struct c2_ring { + struct c2_element *to_clean; + struct c2_element *to_use; + struct c2_element *start; + unsigned long count; +}; + +struct c2_dev { + struct ib_device ibdev; + void __iomem *regs; + void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */ + void __iomem *mmio_rxp_ring; + spinlock_t lock; + struct pci_dev *pcidev; + struct net_device *netdev; + struct net_device *pseudo_netdev; + unsigned int cur_tx; + unsigned int cur_rx; + u32 adapter_handle; + int device_cap_flags; + void __iomem *kva; /* KVA device memory */ + unsigned long pa; /* PA device memory */ + void **qptr_array; + + struct kmem_cache *host_msg_cache; + + struct list_head cca_link; /* adapter list */ + struct list_head eh_wakeup_list; /* event wakeup list */ + wait_queue_head_t req_vq_wo; + + /* Cached RNIC properties */ + struct ib_device_attr props; + + struct c2_pd_table pd_table; + struct c2_qp_table qp_table; + int ports; /* num of GigE ports */ + int devnum; + spinlock_t vqlock; /* sync vbs req MQ */ + + /* Verbs Queues */ + struct c2_mq req_vq; /* Verbs Request MQ */ + struct c2_mq rep_vq; /* Verbs Reply MQ */ + struct c2_mq aeq; /* Async Events MQ */ + + /* Kernel client MQs */ + struct sp_chunk *kern_mqsp_pool; + + /* Device updates these values when posting messages to a host + * target queue */ + u16 req_vq_shared; + u16 rep_vq_shared; + u16 aeq_shared; + u16 irq_claimed; + + /* + * Shared host target pages for user-accessible MQs. + */ + int hthead; /* index of first free entry */ + void *htpages; /* kernel vaddr */ + int htlen; /* length of htpages memory */ + void *htuva; /* user mapped vaddr */ + spinlock_t htlock; /* serialize allocation */ + + u64 adapter_hint_uva; /* access to the activity FIFO */ + + // spinlock_t aeq_lock; + // spinlock_t rnic_lock; + + __be16 *hint_count; + dma_addr_t hint_count_dma; + u16 hints_read; + + int init; /* TRUE if it's ready */ + char ae_cache_name[16]; + char vq_cache_name[16]; +}; + +struct c2_port { + u32 msg_enable; + struct c2_dev *c2dev; + struct net_device *netdev; + + spinlock_t tx_lock; + u32 tx_avail; + struct c2_ring tx_ring; + struct c2_ring rx_ring; + + void *mem; /* PCI memory for host rings */ + dma_addr_t dma; + unsigned long mem_size; + + u32 rx_buf_size; +}; + +/* + * Activity FIFO registers in BAR0. + */ +#define PCI_BAR0_HOST_HINT 0x100 +#define PCI_BAR0_ADAPTER_HINT 0x2000 + +/* + * Ammasso PCI vendor id and Cepheus PCI device id. + */ +#define CQ_ARMED 0x01 +#define CQ_WAIT_FOR_DMA 0x80 + +/* + * The format of a hint is as follows: + * Lower 16 bits are the count of hints for the queue. + * Next 15 bits are the qp_index + * Upper most bit depends on who reads it: + * If read by producer, then it means Full (1) or Not-Full (0) + * If read by consumer, then it means Empty (1) or Not-Empty (0) + */ +#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) +#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) +#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) + + +/* + * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t + * struct. + */ +#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000 + +#ifndef readq +static inline u64 readq(const void __iomem * addr) +{ + u64 ret = readl(addr + 4); + ret <<= 32; + ret |= readl(addr); + + return ret; +} +#endif + +#ifndef writeq +static inline void __raw_writeq(u64 val, void __iomem * addr) +{ + __raw_writel((u32) (val), addr); + __raw_writel((u32) (val >> 32), (addr + 4)); +} +#endif + +#define C2_SET_CUR_RX(c2dev, cur_rx) \ + __raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092) + +#define C2_GET_CUR_RX(c2dev) \ + be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092)) + +static inline struct c2_dev *to_c2dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct c2_dev, ibdev); +} + +static inline int c2_errno(void *reply) +{ + switch (c2_wr_get_result(reply)) { + case C2_OK: + return 0; + case CCERR_NO_BUFS: + case CCERR_INSUFFICIENT_RESOURCES: + case CCERR_ZERO_RDMA_READ_RESOURCES: + return -ENOMEM; + case CCERR_MR_IN_USE: + case CCERR_QP_IN_USE: + return -EBUSY; + case CCERR_ADDR_IN_USE: + return -EADDRINUSE; + case CCERR_ADDR_NOT_AVAIL: + return -EADDRNOTAVAIL; + case CCERR_CONN_RESET: + return -ECONNRESET; + case CCERR_NOT_IMPLEMENTED: + case CCERR_INVALID_WQE: + return -ENOSYS; + case CCERR_QP_NOT_PRIVILEGED: + return -EPERM; + case CCERR_STACK_ERROR: + return -EPROTO; + case CCERR_ACCESS_VIOLATION: + case CCERR_BASE_AND_BOUNDS_VIOLATION: + return -EFAULT; + case CCERR_STAG_STATE_NOT_INVALID: + case CCERR_INVALID_ADDRESS: + case CCERR_INVALID_CQ: + case CCERR_INVALID_EP: + case CCERR_INVALID_MODIFIER: + case CCERR_INVALID_MTU: + case CCERR_INVALID_PD_ID: + case CCERR_INVALID_QP: + case CCERR_INVALID_RNIC: + case CCERR_INVALID_STAG: + return -EINVAL; + default: + return -EAGAIN; + } +} + +/* Device */ +extern int c2_register_device(struct c2_dev *c2dev); +extern void c2_unregister_device(struct c2_dev *c2dev); +extern int c2_rnic_init(struct c2_dev *c2dev); +extern void c2_rnic_term(struct c2_dev *c2dev); +extern void c2_rnic_interrupt(struct c2_dev *c2dev); +extern int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask); +extern int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask); + +/* QPs */ +extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd, + struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp); +extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp); +extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn); +extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, + struct ib_qp_attr *attr, int attr_mask); +extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, + int ord, int ird); +extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr); +extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr); +extern void __devinit c2_init_qp_table(struct c2_dev *c2dev); +extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev); +extern void c2_set_qp_state(struct c2_qp *, int); +extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn); + +/* PDs */ +extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd); +extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd); +extern int __devinit c2_init_pd_table(struct c2_dev *c2dev); +extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev); + +/* CQs */ +extern int c2_init_cq(struct c2_dev *c2dev, int entries, + struct c2_ucontext *ctx, struct c2_cq *cq); +extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq); +extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index); +extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index); +extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); +extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); + +/* CM */ +extern int c2_llp_connect(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param); +extern int c2_llp_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param); +extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, + u8 pdata_len); +extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog); +extern int c2_llp_service_destroy(struct iw_cm_id *cm_id); + +/* MM */ +extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, + int page_size, int pbl_depth, u32 length, + u32 off, u64 *va, enum c2_acf acf, + struct c2_mr *mr); +extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index); + +/* AE */ +extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index); + +/* MQSP Allocator */ +extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **root); +extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root); +extern __be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, + dma_addr_t *dma_addr, gfp_t gfp_mask); +extern void c2_free_mqsp(__be16* mqsp); +#endif diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c new file mode 100644 index 00000000..32d34e88 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include +#include "c2_status.h" +#include "c2_ae.h" + +static int c2_convert_cm_status(u32 c2_status) +{ + switch (c2_status) { + case C2_CONN_STATUS_SUCCESS: + return 0; + case C2_CONN_STATUS_REJECTED: + return -ENETRESET; + case C2_CONN_STATUS_REFUSED: + return -ECONNREFUSED; + case C2_CONN_STATUS_TIMEDOUT: + return -ETIMEDOUT; + case C2_CONN_STATUS_NETUNREACH: + return -ENETUNREACH; + case C2_CONN_STATUS_HOSTUNREACH: + return -EHOSTUNREACH; + case C2_CONN_STATUS_INVALID_RNIC: + return -EINVAL; + case C2_CONN_STATUS_INVALID_QP: + return -EINVAL; + case C2_CONN_STATUS_INVALID_QP_STATE: + return -EINVAL; + case C2_CONN_STATUS_ADDR_NOT_AVAIL: + return -EADDRNOTAVAIL; + default: + printk(KERN_ERR PFX + "%s - Unable to convert CM status: %d\n", + __func__, c2_status); + return -EIO; + } +} + +static const char* to_event_str(int event) +{ + static const char* event_str[] = { + "CCAE_REMOTE_SHUTDOWN", + "CCAE_ACTIVE_CONNECT_RESULTS", + "CCAE_CONNECTION_REQUEST", + "CCAE_LLP_CLOSE_COMPLETE", + "CCAE_TERMINATE_MESSAGE_RECEIVED", + "CCAE_LLP_CONNECTION_RESET", + "CCAE_LLP_CONNECTION_LOST", + "CCAE_LLP_SEGMENT_SIZE_INVALID", + "CCAE_LLP_INVALID_CRC", + "CCAE_LLP_BAD_FPDU", + "CCAE_INVALID_DDP_VERSION", + "CCAE_INVALID_RDMA_VERSION", + "CCAE_UNEXPECTED_OPCODE", + "CCAE_INVALID_DDP_QUEUE_NUMBER", + "CCAE_RDMA_READ_NOT_ENABLED", + "CCAE_RDMA_WRITE_NOT_ENABLED", + "CCAE_RDMA_READ_TOO_SMALL", + "CCAE_NO_L_BIT", + "CCAE_TAGGED_INVALID_STAG", + "CCAE_TAGGED_BASE_BOUNDS_VIOLATION", + "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION", + "CCAE_TAGGED_INVALID_PD", + "CCAE_WRAP_ERROR", + "CCAE_BAD_CLOSE", + "CCAE_BAD_LLP_CLOSE", + "CCAE_INVALID_MSN_RANGE", + "CCAE_INVALID_MSN_GAP", + "CCAE_IRRQ_OVERFLOW", + "CCAE_IRRQ_MSN_GAP", + "CCAE_IRRQ_MSN_RANGE", + "CCAE_IRRQ_INVALID_STAG", + "CCAE_IRRQ_BASE_BOUNDS_VIOLATION", + "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION", + "CCAE_IRRQ_INVALID_PD", + "CCAE_IRRQ_WRAP_ERROR", + "CCAE_CQ_SQ_COMPLETION_OVERFLOW", + "CCAE_CQ_RQ_COMPLETION_ERROR", + "CCAE_QP_SRQ_WQE_ERROR", + "CCAE_QP_LOCAL_CATASTROPHIC_ERROR", + "CCAE_CQ_OVERFLOW", + "CCAE_CQ_OPERATION_ERROR", + "CCAE_SRQ_LIMIT_REACHED", + "CCAE_QP_RQ_LIMIT_REACHED", + "CCAE_SRQ_CATASTROPHIC_ERROR", + "CCAE_RNIC_CATASTROPHIC_ERROR" + }; + + if (event < CCAE_REMOTE_SHUTDOWN || + event > CCAE_RNIC_CATASTROPHIC_ERROR) + return ""; + + event -= CCAE_REMOTE_SHUTDOWN; + return event_str[event]; +} + +static const char *to_qp_state_str(int state) +{ + switch (state) { + case C2_QP_STATE_IDLE: + return "C2_QP_STATE_IDLE"; + case C2_QP_STATE_CONNECTING: + return "C2_QP_STATE_CONNECTING"; + case C2_QP_STATE_RTS: + return "C2_QP_STATE_RTS"; + case C2_QP_STATE_CLOSING: + return "C2_QP_STATE_CLOSING"; + case C2_QP_STATE_TERMINATE: + return "C2_QP_STATE_TERMINATE"; + case C2_QP_STATE_ERROR: + return "C2_QP_STATE_ERROR"; + default: + return ""; + }; +} + +void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) +{ + struct c2_mq *mq = c2dev->qptr_array[mq_index]; + union c2wr *wr; + void *resource_user_context; + struct iw_cm_event cm_event; + struct ib_event ib_event; + enum c2_resource_indicator resource_indicator; + enum c2_event_id event_id; + unsigned long flags; + int status; + + /* + * retrieve the message + */ + wr = c2_mq_consume(mq); + if (!wr) + return; + + memset(&ib_event, 0, sizeof(ib_event)); + memset(&cm_event, 0, sizeof(cm_event)); + + event_id = c2_wr_get_id(wr); + resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type); + resource_user_context = + (void *) (unsigned long) wr->ae.ae_generic.user_context; + + status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr)); + + pr_debug("event received c2_dev=%p, event_id=%d, " + "resource_indicator=%d, user_context=%p, status = %d\n", + c2dev, event_id, resource_indicator, resource_user_context, + status); + + switch (resource_indicator) { + case C2_RES_IND_QP:{ + + struct c2_qp *qp = (struct c2_qp *)resource_user_context; + struct iw_cm_id *cm_id = qp->cm_id; + struct c2wr_ae_active_connect_results *res; + + if (!cm_id) { + pr_debug("event received, but cm_id is , qp=%p!\n", + qp); + goto ignore_it; + } + pr_debug("%s: event = %s, user_context=%llx, " + "resource_type=%x, " + "resource=%x, qp_state=%s\n", + __func__, + to_event_str(event_id), + (unsigned long long) wr->ae.ae_generic.user_context, + be32_to_cpu(wr->ae.ae_generic.resource_type), + be32_to_cpu(wr->ae.ae_generic.resource), + to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state))); + + c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state)); + + switch (event_id) { + case CCAE_ACTIVE_CONNECT_RESULTS: + res = &wr->ae.ae_active_connect_results; + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.local_addr.sin_addr.s_addr = res->laddr; + cm_event.remote_addr.sin_addr.s_addr = res->raddr; + cm_event.local_addr.sin_port = res->lport; + cm_event.remote_addr.sin_port = res->rport; + if (status == 0) { + cm_event.private_data_len = + be32_to_cpu(res->private_data_length); + cm_event.private_data = res->private_data; + } else { + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + cm_event.private_data_len = 0; + cm_event.private_data = NULL; + } + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + case CCAE_TERMINATE_MESSAGE_RECEIVED: + case CCAE_CQ_SQ_COMPLETION_OVERFLOW: + ib_event.device = &c2dev->ibdev; + ib_event.element.qp = &qp->ibqp; + ib_event.event = IB_EVENT_QP_REQ_ERR; + + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&ib_event, + qp->ibqp. + qp_context); + break; + case CCAE_BAD_CLOSE: + case CCAE_LLP_CLOSE_COMPLETE: + case CCAE_LLP_CONNECTION_RESET: + case CCAE_LLP_CONNECTION_LOST: + BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b); + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = 0; + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + default: + BUG_ON(1); + pr_debug("%s:%d Unexpected event_id=%d on QP=%p, " + "CM_ID=%p\n", + __func__, __LINE__, + event_id, qp, cm_id); + break; + } + break; + } + + case C2_RES_IND_EP:{ + + struct c2wr_ae_connection_request *req = + &wr->ae.ae_connection_request; + struct iw_cm_id *cm_id = + (struct iw_cm_id *)resource_user_context; + + pr_debug("C2_RES_IND_EP event_id=%d\n", event_id); + if (event_id != CCAE_CONNECTION_REQUEST) { + pr_debug("%s: Invalid event_id: %d\n", + __func__, event_id); + break; + } + cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; + cm_event.provider_data = (void*)(unsigned long)req->cr_handle; + cm_event.local_addr.sin_addr.s_addr = req->laddr; + cm_event.remote_addr.sin_addr.s_addr = req->raddr; + cm_event.local_addr.sin_port = req->lport; + cm_event.remote_addr.sin_port = req->rport; + cm_event.private_data_len = + be32_to_cpu(req->private_data_length); + cm_event.private_data = req->private_data; + /* + * Until ird/ord negotiation via MPAv2 support is added, send + * max supported values + */ + cm_event.ird = cm_event.ord = 128; + + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + } + + case C2_RES_IND_CQ:{ + struct c2_cq *cq = + (struct c2_cq *) resource_user_context; + + pr_debug("IB_EVENT_CQ_ERR\n"); + ib_event.device = &c2dev->ibdev; + ib_event.element.cq = &cq->ibcq; + ib_event.event = IB_EVENT_CQ_ERR; + + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&ib_event, + cq->ibcq.cq_context); + } + + default: + printk("Bad resource indicator = %d\n", + resource_indicator); + break; + } + + ignore_it: + c2_mq_free(mq); +} diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h new file mode 100644 index 00000000..3a065c33 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_ae.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_AE_H_ +#define _C2_AE_H_ + +/* + * WARNING: If you change this file, also bump C2_IVN_BASE + * in common/include/clustercore/c2_ivn.h. + */ + +/* + * Asynchronous Event Identifiers + * + * These start at 0x80 only so it's obvious from inspection that + * they are not work-request statuses. This isn't critical. + * + * NOTE: these event id's must fit in eight bits. + */ +enum c2_event_id { + CCAE_REMOTE_SHUTDOWN = 0x80, + CCAE_ACTIVE_CONNECT_RESULTS, + CCAE_CONNECTION_REQUEST, + CCAE_LLP_CLOSE_COMPLETE, + CCAE_TERMINATE_MESSAGE_RECEIVED, + CCAE_LLP_CONNECTION_RESET, + CCAE_LLP_CONNECTION_LOST, + CCAE_LLP_SEGMENT_SIZE_INVALID, + CCAE_LLP_INVALID_CRC, + CCAE_LLP_BAD_FPDU, + CCAE_INVALID_DDP_VERSION, + CCAE_INVALID_RDMA_VERSION, + CCAE_UNEXPECTED_OPCODE, + CCAE_INVALID_DDP_QUEUE_NUMBER, + CCAE_RDMA_READ_NOT_ENABLED, + CCAE_RDMA_WRITE_NOT_ENABLED, + CCAE_RDMA_READ_TOO_SMALL, + CCAE_NO_L_BIT, + CCAE_TAGGED_INVALID_STAG, + CCAE_TAGGED_BASE_BOUNDS_VIOLATION, + CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION, + CCAE_TAGGED_INVALID_PD, + CCAE_WRAP_ERROR, + CCAE_BAD_CLOSE, + CCAE_BAD_LLP_CLOSE, + CCAE_INVALID_MSN_RANGE, + CCAE_INVALID_MSN_GAP, + CCAE_IRRQ_OVERFLOW, + CCAE_IRRQ_MSN_GAP, + CCAE_IRRQ_MSN_RANGE, + CCAE_IRRQ_INVALID_STAG, + CCAE_IRRQ_BASE_BOUNDS_VIOLATION, + CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION, + CCAE_IRRQ_INVALID_PD, + CCAE_IRRQ_WRAP_ERROR, + CCAE_CQ_SQ_COMPLETION_OVERFLOW, + CCAE_CQ_RQ_COMPLETION_ERROR, + CCAE_QP_SRQ_WQE_ERROR, + CCAE_QP_LOCAL_CATASTROPHIC_ERROR, + CCAE_CQ_OVERFLOW, + CCAE_CQ_OPERATION_ERROR, + CCAE_SRQ_LIMIT_REACHED, + CCAE_QP_RQ_LIMIT_REACHED, + CCAE_SRQ_CATASTROPHIC_ERROR, + CCAE_RNIC_CATASTROPHIC_ERROR +/* WARNING If you add more id's, make sure their values fit in eight bits. */ +}; + +/* + * Resource Indicators and Identifiers + */ +enum c2_resource_indicator { + C2_RES_IND_QP = 1, + C2_RES_IND_EP, + C2_RES_IND_CQ, + C2_RES_IND_SRQ, +}; + +#endif /* _C2_AE_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c new file mode 100644 index 00000000..78d247ec --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_alloc.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "c2.h" + +static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **head) +{ + int i; + struct sp_chunk *new_head; + dma_addr_t dma_addr; + + new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE, + &dma_addr, gfp_mask); + if (new_head == NULL) + return -ENOMEM; + + new_head->dma_addr = dma_addr; + dma_unmap_addr_set(new_head, mapping, new_head->dma_addr); + + new_head->next = NULL; + new_head->head = 0; + + /* build list where each index is the next free slot */ + for (i = 0; + i < (PAGE_SIZE - sizeof(struct sp_chunk) - + sizeof(u16)) / sizeof(u16) - 1; + i++) { + new_head->shared_ptr[i] = i + 1; + } + /* terminate list */ + new_head->shared_ptr[i] = 0xFFFF; + + *head = new_head; + return 0; +} + +int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **root) +{ + return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root); +} + +void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root) +{ + struct sp_chunk *next; + + while (root) { + next = root->next; + dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root, + dma_unmap_addr(root, mapping)); + root = next; + } +} + +__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, + dma_addr_t *dma_addr, gfp_t gfp_mask) +{ + u16 mqsp; + + while (head) { + mqsp = head->head; + if (mqsp != 0xFFFF) { + head->head = head->shared_ptr[mqsp]; + break; + } else if (head->next == NULL) { + if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) == + 0) { + head = head->next; + mqsp = head->head; + head->head = head->shared_ptr[mqsp]; + break; + } else + return NULL; + } else + head = head->next; + } + if (head) { + *dma_addr = head->dma_addr + + ((unsigned long) &(head->shared_ptr[mqsp]) - + (unsigned long) head); + pr_debug("%s addr %p dma_addr %llx\n", __func__, + &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr); + return (__force __be16 *) &(head->shared_ptr[mqsp]); + } + return NULL; +} + +void c2_free_mqsp(__be16 *mqsp) +{ + struct sp_chunk *head; + u16 idx; + + /* The chunk containing this ptr begins at the page boundary */ + head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK); + + /* Link head to new mqsp */ + *mqsp = (__force __be16) head->head; + + /* Compute the shared_ptr index */ + idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1; + idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1; + + /* Point this index at the head */ + head->shared_ptr[idx] = head->head; + + /* Point head at this index */ + head->head = idx; +} diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c new file mode 100644 index 00000000..95f58ab1 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_cm.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "c2.h" +#include "c2_wr.h" +#include "c2_vq.h" +#include + +int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct c2_dev *c2dev = to_c2dev(cm_id->device); + struct ib_qp *ibqp; + struct c2_qp *qp; + struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */ + struct c2_vq_req *vq_req; + int err; + + ibqp = c2_get_qp(cm_id->device, iw_param->qpn); + if (!ibqp) + return -EINVAL; + qp = to_c2qp(ibqp); + + /* Associate QP <--> CM_ID */ + cm_id->provider_data = qp; + cm_id->add_ref(cm_id); + qp->cm_id = cm_id; + + /* + * only support the max private_data length + */ + if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { + err = -EINVAL; + goto bail0; + } + /* + * Set the rdma read limits + */ + err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); + if (err) + goto bail0; + + /* + * Create and send a WR_QP_CONNECT... + */ + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + + c2_wr_set_id(wr, CCWR_QP_CONNECT); + wr->hdr.context = 0; + wr->rnic_handle = c2dev->adapter_handle; + wr->qp_handle = qp->adapter_handle; + + wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr; + wr->remote_port = cm_id->remote_addr.sin_port; + + /* + * Move any private data from the callers's buf into + * the WR. + */ + if (iw_param->private_data) { + wr->private_data_length = + cpu_to_be32(iw_param->private_data_len); + memcpy(&wr->private_data[0], iw_param->private_data, + iw_param->private_data_len); + } else + wr->private_data_length = 0; + + /* + * Send WR to adapter. NOTE: There is no synch reply from + * the adapter. + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + vq_req_free(c2dev, vq_req); + + bail1: + kfree(wr); + bail0: + if (err) { + /* + * If we fail, release reference on QP and + * disassociate QP from CM_ID + */ + cm_id->provider_data = NULL; + qp->cm_id = NULL; + cm_id->rem_ref(cm_id); + } + return err; +} + +int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog) +{ + struct c2_dev *c2dev; + struct c2wr_ep_listen_create_req wr; + struct c2wr_ep_listen_create_rep *reply; + struct c2_vq_req *vq_req; + int err; + + c2dev = to_c2dev(cm_id->device); + if (c2dev == NULL) + return -EINVAL; + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE); + wr.hdr.context = (u64) (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.local_addr = cm_id->local_addr.sin_addr.s_addr; + wr.local_port = cm_id->local_addr.sin_port; + wr.backlog = cpu_to_be32(backlog); + wr.user_context = (u64) (unsigned long) cm_id; + + /* + * Reference the request struct. Dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply = + (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + if ((err = c2_errno(reply)) != 0) + goto bail1; + + /* + * Keep the adapter handle. Used in subsequent destroy + */ + cm_id->provider_data = (void*)(unsigned long) reply->ep_handle; + + /* + * free vq stuff + */ + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + return 0; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + + +int c2_llp_service_destroy(struct iw_cm_id *cm_id) +{ + + struct c2_dev *c2dev; + struct c2wr_ep_listen_destroy_req wr; + struct c2wr_ep_listen_destroy_rep *reply; + struct c2_vq_req *vq_req; + int err; + + c2dev = to_c2dev(cm_id->device); + if (c2dev == NULL) + return -EINVAL; + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.ep_handle = (u32)(unsigned long)cm_id->provider_data; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + if ((err = c2_errno(reply)) != 0) + goto bail1; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct c2_dev *c2dev = to_c2dev(cm_id->device); + struct c2_qp *qp; + struct ib_qp *ibqp; + struct c2wr_cr_accept_req *wr; /* variable length WR */ + struct c2_vq_req *vq_req; + struct c2wr_cr_accept_rep *reply; /* VQ Reply msg ptr. */ + int err; + + ibqp = c2_get_qp(cm_id->device, iw_param->qpn); + if (!ibqp) + return -EINVAL; + qp = to_c2qp(ibqp); + + /* Set the RDMA read limits */ + err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); + if (err) + goto bail0; + + /* Allocate verbs request. */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail0; + } + vq_req->qp = qp; + vq_req->cm_id = cm_id; + vq_req->event = IW_CM_EVENT_ESTABLISHED; + + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail1; + } + + /* Build the WR */ + c2_wr_set_id(wr, CCWR_CR_ACCEPT); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->ep_handle = (u32) (unsigned long) cm_id->provider_data; + wr->qp_handle = qp->adapter_handle; + + /* Replace the cr_handle with the QP after accept */ + cm_id->provider_data = qp; + cm_id->add_ref(cm_id); + qp->cm_id = cm_id; + + cm_id->provider_data = qp; + + /* Validate private_data length */ + if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { + err = -EINVAL; + goto bail1; + } + + if (iw_param->private_data) { + wr->private_data_length = cpu_to_be32(iw_param->private_data_len); + memcpy(&wr->private_data[0], + iw_param->private_data, iw_param->private_data_len); + } else + wr->private_data_length = 0; + + /* Reference the request struct. Dereferenced in the int handler. */ + vq_req_get(c2dev, vq_req); + + /* Send WR to adapter */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + /* Wait for reply from adapter */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + /* Check that reply is present */ + reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + if (!err) + c2_set_qp_state(qp, C2_QP_STATE_RTS); + bail1: + kfree(wr); + vq_req_free(c2dev, vq_req); + bail0: + if (err) { + /* + * If we fail, release reference on QP and + * disassociate QP from CM_ID + */ + cm_id->provider_data = NULL; + qp->cm_id = NULL; + cm_id->rem_ref(cm_id); + } + return err; +} + +int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + struct c2_dev *c2dev; + struct c2wr_cr_reject_req wr; + struct c2_vq_req *vq_req; + struct c2wr_cr_reject_rep *reply; + int err; + + c2dev = to_c2dev(cm_id->device); + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_CR_REJECT); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.ep_handle = (u32) (unsigned long) cm_id->provider_data; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply = (struct c2wr_cr_reject_rep *) (unsigned long) + vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + err = c2_errno(reply); + /* + * free vq stuff + */ + vq_repbuf_free(c2dev, reply); + + bail0: + vq_req_free(c2dev, vq_req); + return err; +} diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c new file mode 100644 index 00000000..49e0e853 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "c2.h" +#include "c2_vq.h" +#include "c2_status.h" + +#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1)) + +static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn) +{ + struct c2_cq *cq; + unsigned long flags; + + spin_lock_irqsave(&c2dev->lock, flags); + cq = c2dev->qptr_array[cqn]; + if (!cq) { + spin_unlock_irqrestore(&c2dev->lock, flags); + return NULL; + } + atomic_inc(&cq->refcount); + spin_unlock_irqrestore(&c2dev->lock, flags); + return cq; +} + +static void c2_cq_put(struct c2_cq *cq) +{ + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + +void c2_cq_event(struct c2_dev *c2dev, u32 mq_index) +{ + struct c2_cq *cq; + + cq = c2_cq_get(c2dev, mq_index); + if (!cq) { + printk("discarding events on destroyed CQN=%d\n", mq_index); + return; + } + + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + c2_cq_put(cq); +} + +void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index) +{ + struct c2_cq *cq; + struct c2_mq *q; + + cq = c2_cq_get(c2dev, mq_index); + if (!cq) + return; + + spin_lock_irq(&cq->lock); + q = &cq->mq; + if (q && !c2_mq_empty(q)) { + u16 priv = q->priv; + struct c2wr_ce *msg; + + while (priv != be16_to_cpu(*q->shared)) { + msg = (struct c2wr_ce *) + (q->msg_pool.host + priv * q->msg_size); + if (msg->qp_user_context == (u64) (unsigned long) qp) { + msg->qp_user_context = (u64) 0; + } + priv = (priv + 1) % q->q_size; + } + } + spin_unlock_irq(&cq->lock); + c2_cq_put(cq); +} + +static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status) +{ + switch (status) { + case C2_OK: + return IB_WC_SUCCESS; + case CCERR_FLUSHED: + return IB_WC_WR_FLUSH_ERR; + case CCERR_BASE_AND_BOUNDS_VIOLATION: + return IB_WC_LOC_PROT_ERR; + case CCERR_ACCESS_VIOLATION: + return IB_WC_LOC_ACCESS_ERR; + case CCERR_TOTAL_LENGTH_TOO_BIG: + return IB_WC_LOC_LEN_ERR; + case CCERR_INVALID_WINDOW: + return IB_WC_MW_BIND_ERR; + default: + return IB_WC_GENERAL_ERR; + } +} + + +static inline int c2_poll_one(struct c2_dev *c2dev, + struct c2_cq *cq, struct ib_wc *entry) +{ + struct c2wr_ce *ce; + struct c2_qp *qp; + int is_recv = 0; + + ce = c2_mq_consume(&cq->mq); + if (!ce) { + return -EAGAIN; + } + + /* + * if the qp returned is null then this qp has already + * been freed and we are unable process the completion. + * try pulling the next message + */ + while ((qp = + (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) { + c2_mq_free(&cq->mq); + ce = c2_mq_consume(&cq->mq); + if (!ce) + return -EAGAIN; + } + + entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce)); + entry->wr_id = ce->hdr.context; + entry->qp = &qp->ibqp; + entry->wc_flags = 0; + entry->slid = 0; + entry->sl = 0; + entry->src_qp = 0; + entry->dlid_path_bits = 0; + entry->pkey_index = 0; + + switch (c2_wr_get_id(ce)) { + case C2_WR_TYPE_SEND: + entry->opcode = IB_WC_SEND; + break; + case C2_WR_TYPE_RDMA_WRITE: + entry->opcode = IB_WC_RDMA_WRITE; + break; + case C2_WR_TYPE_RDMA_READ: + entry->opcode = IB_WC_RDMA_READ; + break; + case C2_WR_TYPE_BIND_MW: + entry->opcode = IB_WC_BIND_MW; + break; + case C2_WR_TYPE_RECV: + entry->byte_len = be32_to_cpu(ce->bytes_rcvd); + entry->opcode = IB_WC_RECV; + is_recv = 1; + break; + default: + break; + } + + /* consume the WQEs */ + if (is_recv) + c2_mq_lconsume(&qp->rq_mq, 1); + else + c2_mq_lconsume(&qp->sq_mq, + be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1); + + /* free the message */ + c2_mq_free(&cq->mq); + + return 0; +} + +int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + struct c2_dev *c2dev = to_c2dev(ibcq->device); + struct c2_cq *cq = to_c2cq(ibcq); + unsigned long flags; + int npolled, err; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + + err = c2_poll_one(c2dev, cq, entry + npolled); + if (err) + break; + } + + spin_unlock_irqrestore(&cq->lock, flags); + + return npolled; +} + +int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) +{ + struct c2_mq_shared __iomem *shared; + struct c2_cq *cq; + unsigned long flags; + int ret = 0; + + cq = to_c2cq(ibcq); + shared = cq->mq.peer; + + if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) + writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type); + else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) + writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type); + else + return -EINVAL; + + writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed); + + /* + * Now read back shared->armed to make the PCI + * write synchronous. This is necessary for + * correct cq notification semantics. + */ + readb(&shared->armed); + + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + spin_lock_irqsave(&cq->lock, flags); + ret = !c2_mq_empty(&cq->mq); + spin_unlock_irqrestore(&cq->lock, flags); + } + + return ret; +} + +static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) +{ + dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size, + mq->msg_pool.host, dma_unmap_addr(mq, mapping)); +} + +static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, + int msg_size) +{ + u8 *pool_start; + + pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, + &mq->host_dma, GFP_KERNEL); + if (!pool_start) + return -ENOMEM; + + c2_mq_rep_init(mq, + 0, /* index (currently unknown) */ + q_size, + msg_size, + pool_start, + NULL, /* peer (currently unknown) */ + C2_MQ_HOST_TARGET); + + dma_unmap_addr_set(mq, mapping, mq->host_dma); + + return 0; +} + +int c2_init_cq(struct c2_dev *c2dev, int entries, + struct c2_ucontext *ctx, struct c2_cq *cq) +{ + struct c2wr_cq_create_req wr; + struct c2wr_cq_create_rep *reply; + unsigned long peer_pa; + struct c2_vq_req *vq_req; + int err; + + might_sleep(); + + cq->ibcq.cqe = entries - 1; + cq->is_kernel = !ctx; + + /* Allocate a shared pointer */ + cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &cq->mq.shared_dma, GFP_KERNEL); + if (!cq->mq.shared) + return -ENOMEM; + + /* Allocate pages for the message pool */ + err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE); + if (err) + goto bail0; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_CQ_CREATE); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.msg_size = cpu_to_be32(cq->mq.msg_size); + wr.depth = cpu_to_be32(cq->mq.q_size); + wr.shared_ht = cpu_to_be64(cq->mq.shared_dma); + wr.msg_pool = cpu_to_be64(cq->mq.host_dma); + wr.user_context = (u64) (unsigned long) (cq); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail2; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail2; + + reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail2; + } + + if ((err = c2_errno(reply)) != 0) + goto bail3; + + cq->adapter_handle = reply->cq_handle; + cq->mq.index = be32_to_cpu(reply->mq_index); + + peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared); + cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE); + if (!cq->mq.peer) { + err = -ENOMEM; + goto bail3; + } + + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + spin_lock_init(&cq->lock); + atomic_set(&cq->refcount, 1); + init_waitqueue_head(&cq->wait); + + /* + * Use the MQ index allocated by the adapter to + * store the CQ in the qptr_array + */ + cq->cqn = cq->mq.index; + c2dev->qptr_array[cq->cqn] = cq; + + return 0; + + bail3: + vq_repbuf_free(c2dev, reply); + bail2: + vq_req_free(c2dev, vq_req); + bail1: + c2_free_cq_buf(c2dev, &cq->mq); + bail0: + c2_free_mqsp(cq->mq.shared); + + return err; +} + +void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq) +{ + int err; + struct c2_vq_req *vq_req; + struct c2wr_cq_destroy_req wr; + struct c2wr_cq_destroy_rep *reply; + + might_sleep(); + + /* Clear CQ from the qptr array */ + spin_lock_irq(&c2dev->lock); + c2dev->qptr_array[cq->mq.index] = NULL; + atomic_dec(&cq->refcount); + spin_unlock_irq(&c2dev->lock); + + wait_event(cq->wait, !atomic_read(&cq->refcount)); + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + goto bail0; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_CQ_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.cq_handle = cq->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg); + if (reply) + vq_repbuf_free(c2dev, reply); + bail1: + vq_req_free(c2dev, vq_req); + bail0: + if (cq->is_kernel) { + c2_free_cq_buf(c2dev, &cq->mq); + } + + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c new file mode 100644 index 00000000..8951db4a --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include +#include "c2_vq.h" + +static void handle_mq(struct c2_dev *c2dev, u32 index); +static void handle_vq(struct c2_dev *c2dev, u32 mq_index); + +/* + * Handle RNIC interrupts + */ +void c2_rnic_interrupt(struct c2_dev *c2dev) +{ + unsigned int mq_index; + + while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) { + mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT); + if (mq_index & 0x80000000) { + break; + } + + c2dev->hints_read++; + handle_mq(c2dev, mq_index); + } + +} + +/* + * Top level MQ handler + */ +static void handle_mq(struct c2_dev *c2dev, u32 mq_index) +{ + if (c2dev->qptr_array[mq_index] == NULL) { + pr_debug("handle_mq: stray activity for mq_index=%d\n", + mq_index); + return; + } + + switch (mq_index) { + case (0): + /* + * An index of 0 in the activity queue + * indicates the req vq now has messages + * available... + * + * Wake up any waiters waiting on req VQ + * message availability. + */ + wake_up(&c2dev->req_vq_wo); + break; + case (1): + handle_vq(c2dev, mq_index); + break; + case (2): + /* We have to purge the VQ in case there are pending + * accept reply requests that would result in the + * generation of an ESTABLISHED event. If we don't + * generate these first, a CLOSE event could end up + * being delivered before the ESTABLISHED event. + */ + handle_vq(c2dev, 1); + + c2_ae_event(c2dev, mq_index); + break; + default: + /* There is no event synchronization between CQ events + * and AE or CM events. In fact, CQE could be + * delivered for all of the I/O up to and including the + * FLUSH for a peer disconenct prior to the ESTABLISHED + * event being delivered to the app. The reason for this + * is that CM events are delivered on a thread, while AE + * and CM events are delivered on interrupt context. + */ + c2_cq_event(c2dev, mq_index); + break; + } + + return; +} + +/* + * Handles verbs WR replies. + */ +static void handle_vq(struct c2_dev *c2dev, u32 mq_index) +{ + void *adapter_msg, *reply_msg; + struct c2wr_hdr *host_msg; + struct c2wr_hdr tmp; + struct c2_mq *reply_vq; + struct c2_vq_req *req; + struct iw_cm_event cm_event; + int err; + + reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index]; + + /* + * get next msg from mq_index into adapter_msg. + * don't free it yet. + */ + adapter_msg = c2_mq_consume(reply_vq); + if (adapter_msg == NULL) { + return; + } + + host_msg = vq_repbuf_alloc(c2dev); + + /* + * If we can't get a host buffer, then we'll still + * wakeup the waiter, we just won't give him the msg. + * It is assumed the waiter will deal with this... + */ + if (!host_msg) { + pr_debug("handle_vq: no repbufs!\n"); + + /* + * just copy the WR header into a local variable. + * this allows us to still demux on the context + */ + host_msg = &tmp; + memcpy(host_msg, adapter_msg, sizeof(tmp)); + reply_msg = NULL; + } else { + memcpy(host_msg, adapter_msg, reply_vq->msg_size); + reply_msg = host_msg; + } + + /* + * consume the msg from the MQ + */ + c2_mq_free(reply_vq); + + /* + * wakeup the waiter. + */ + req = (struct c2_vq_req *) (unsigned long) host_msg->context; + if (req == NULL) { + /* + * We should never get here, as the adapter should + * never send us a reply that we're not expecting. + */ + vq_repbuf_free(c2dev, host_msg); + pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n"); + return; + } + + if (reply_msg) + err = c2_errno(reply_msg); + else + err = -ENOMEM; + + if (!err) switch (req->event) { + case IW_CM_EVENT_ESTABLISHED: + c2_set_qp_state(req->qp, + C2_QP_STATE_RTS); + /* + * Until ird/ord negotiation via MPAv2 support is added, send + * max supported values + */ + cm_event.ird = cm_event.ord = 128; + case IW_CM_EVENT_CLOSE: + + /* + * Move the QP to RTS if this is + * the established event + */ + cm_event.event = req->event; + cm_event.status = 0; + cm_event.local_addr = req->cm_id->local_addr; + cm_event.remote_addr = req->cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + req->cm_id->event_handler(req->cm_id, &cm_event); + break; + default: + break; + } + + req->reply_msg = (u64) (unsigned long) (reply_msg); + atomic_set(&req->reply_ready, 1); + wake_up(&req->wait_object); + + /* + * If the request was cancelled, then this put will + * free the vq_req memory...and reply_msg!!! + */ + vq_req_put(c2dev, req); +} diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c new file mode 100644 index 00000000..119c4f3d --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mm.c @@ -0,0 +1,377 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include + +#include "c2.h" +#include "c2_vq.h" + +#define PBL_VIRT 1 +#define PBL_PHYS 2 + +/* + * Send all the PBL messages to convey the remainder of the PBL + * Wait for the adapter's reply on the last one. + * This is indicated by setting the MEM_PBL_COMPLETE in the flags. + * + * NOTE: vq_req is _not_ freed by this function. The VQ Host + * Reply buffer _is_ freed by this function. + */ +static int +send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index, + unsigned long va, u32 pbl_depth, + struct c2_vq_req *vq_req, int pbl_type) +{ + u32 pbe_count; /* amt that fits in a PBL msg */ + u32 count; /* amt in this PBL MSG. */ + struct c2wr_nsmr_pbl_req *wr; /* PBL WR ptr */ + struct c2wr_nsmr_pbl_rep *reply; /* reply ptr */ + int err, pbl_virt, pbl_index, i; + + switch (pbl_type) { + case PBL_VIRT: + pbl_virt = 1; + break; + case PBL_PHYS: + pbl_virt = 0; + break; + default: + return -EINVAL; + break; + } + + pbe_count = (c2dev->req_vq.msg_size - + sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + return -ENOMEM; + } + c2_wr_set_id(wr, CCWR_NSMR_PBL); + + /* + * Only the last PBL message will generate a reply from the verbs, + * so we set the context to 0 indicating there is no kernel verbs + * handler blocked awaiting this reply. + */ + wr->hdr.context = 0; + wr->rnic_handle = c2dev->adapter_handle; + wr->stag_index = stag_index; /* already swapped */ + wr->flags = 0; + pbl_index = 0; + while (pbl_depth) { + count = min(pbe_count, pbl_depth); + wr->addrs_length = cpu_to_be32(count); + + /* + * If this is the last message, then reference the + * vq request struct cuz we're gonna wait for a reply. + * also make this PBL msg as the last one. + */ + if (count == pbl_depth) { + /* + * reference the request struct. dereferenced in the + * int handler. + */ + vq_req_get(c2dev, vq_req); + wr->flags = cpu_to_be32(MEM_PBL_COMPLETE); + + /* + * This is the last PBL message. + * Set the context to our VQ Request Object so we can + * wait for the reply. + */ + wr->hdr.context = (unsigned long) vq_req; + } + + /* + * If pbl_virt is set then va is a virtual address + * that describes a virtually contiguous memory + * allocation. The wr needs the start of each virtual page + * to be converted to the corresponding physical address + * of the page. If pbl_virt is not set then va is an array + * of physical addresses and there is no conversion to do. + * Just fill in the wr with what is in the array. + */ + for (i = 0; i < count; i++) { + if (pbl_virt) { + va += PAGE_SIZE; + } else { + wr->paddrs[i] = + cpu_to_be64(((u64 *)va)[pbl_index + i]); + } + } + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + if (count <= pbe_count) { + vq_req_put(c2dev, vq_req); + } + goto bail0; + } + pbl_depth -= count; + pbl_index += count; + } + + /* + * Now wait for the reply... + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + + vq_repbuf_free(c2dev, reply); + bail0: + kfree(wr); + return err; +} + +#define C2_PBL_MAX_DEPTH 131072 +int +c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, + int page_size, int pbl_depth, u32 length, + u32 offset, u64 *va, enum c2_acf acf, + struct c2_mr *mr) +{ + struct c2_vq_req *vq_req; + struct c2wr_nsmr_register_req *wr; + struct c2wr_nsmr_register_rep *reply; + u16 flags; + int i, pbe_count, count; + int err; + + if (!va || !length || !addr_list || !pbl_depth) + return -EINTR; + + /* + * Verify PBL depth is within rnic max + */ + if (pbl_depth > C2_PBL_MAX_DEPTH) { + return -EINTR; + } + + /* + * allocate verbs request object + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + /* + * build the WR + */ + c2_wr_set_id(wr, CCWR_NSMR_REGISTER); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + + flags = (acf | MEM_VA_BASED | MEM_REMOTE); + + /* + * compute how many pbes can fit in the message + */ + pbe_count = (c2dev->req_vq.msg_size - + sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64); + + if (pbl_depth <= pbe_count) { + flags |= MEM_PBL_COMPLETE; + } + wr->flags = cpu_to_be16(flags); + wr->stag_key = 0; //stag_key; + wr->va = cpu_to_be64(*va); + wr->pd_id = mr->pd->pd_id; + wr->pbe_size = cpu_to_be32(page_size); + wr->length = cpu_to_be32(length); + wr->pbl_depth = cpu_to_be32(pbl_depth); + wr->fbo = cpu_to_be32(offset); + count = min(pbl_depth, pbe_count); + wr->addrs_length = cpu_to_be32(count); + + /* + * fill out the PBL for this message + */ + for (i = 0; i < count; i++) { + wr->paddrs[i] = cpu_to_be64(addr_list[i]); + } + + /* + * regerence the request struct + */ + vq_req_get(c2dev, vq_req); + + /* + * send the WR to the adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + /* + * wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail1; + } + + /* + * process reply + */ + reply = + (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + if ((err = c2_errno(reply))) { + goto bail2; + } + //*p_pb_entries = be32_to_cpu(reply->pbl_depth); + mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index); + vq_repbuf_free(c2dev, reply); + + /* + * if there are still more PBEs we need to send them to + * the adapter and wait for a reply on the final one. + * reuse vq_req for this purpose. + */ + pbl_depth -= count; + if (pbl_depth) { + + vq_req->reply_msg = (unsigned long) NULL; + atomic_set(&vq_req->reply_ready, 0); + err = send_pbl_messages(c2dev, + cpu_to_be32(mr->ibmr.lkey), + (unsigned long) &addr_list[i], + pbl_depth, vq_req, PBL_PHYS); + if (err) { + goto bail1; + } + } + + vq_req_free(c2dev, vq_req); + kfree(wr); + + return err; + + bail2: + vq_repbuf_free(c2dev, reply); + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index) +{ + struct c2_vq_req *vq_req; /* verbs request object */ + struct c2wr_stag_dealloc_req wr; /* work request */ + struct c2wr_stag_dealloc_rep *reply; /* WR reply */ + int err; + + + /* + * allocate verbs request object + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + return -ENOMEM; + } + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_STAG_DEALLOC); + wr.hdr.context = (u64) (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.stag_index = cpu_to_be32(stag_index); + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c new file mode 100644 index 00000000..0cddc49b --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mq.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include "c2_mq.h" + +void *c2_mq_alloc(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + if (c2_mq_full(q)) { + return NULL; + } else { +#ifdef DEBUG + struct c2wr_hdr *m = + (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size); +#ifdef CCMSGMAGIC + BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC)); + m->magic = cpu_to_be32(CCWR_MAGIC); +#endif + return m; +#else + return q->msg_pool.host + q->priv * q->msg_size; +#endif + } +} + +void c2_mq_produce(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + if (!c2_mq_full(q)) { + q->priv = (q->priv + 1) % q->q_size; + q->hint_count++; + /* Update peer's offset. */ + __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared); + } +} + +void *c2_mq_consume(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_HOST_TARGET); + + if (c2_mq_empty(q)) { + return NULL; + } else { +#ifdef DEBUG + struct c2wr_hdr *m = (struct c2wr_hdr *) + (q->msg_pool.host + q->priv * q->msg_size); +#ifdef CCMSGMAGIC + BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC)); +#endif + return m; +#else + return q->msg_pool.host + q->priv * q->msg_size; +#endif + } +} + +void c2_mq_free(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_HOST_TARGET); + + if (!c2_mq_empty(q)) { + +#ifdef CCMSGMAGIC + { + struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *) + (q->msg_pool.adapter + q->priv * q->msg_size); + __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic); + } +#endif + q->priv = (q->priv + 1) % q->q_size; + /* Update peer's offset. */ + __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared); + } +} + + +void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + while (wqe_count--) { + BUG_ON(c2_mq_empty(q)); + *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size); + } +} + +#if 0 +u32 c2_mq_count(struct c2_mq *q) +{ + s32 count; + + if (q->type == C2_MQ_HOST_TARGET) + count = be16_to_cpu(*q->shared) - q->priv; + else + count = q->priv - be16_to_cpu(*q->shared); + + if (count < 0) + count += q->q_size; + + return (u32) count; +} +#endif /* 0 */ + +void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 __iomem *pool_start, u16 __iomem *peer, u32 type) +{ + BUG_ON(!q->shared); + + /* This code assumes the byte swapping has already been done! */ + q->index = index; + q->q_size = q_size; + q->msg_size = msg_size; + q->msg_pool.adapter = pool_start; + q->peer = (struct c2_mq_shared __iomem *) peer; + q->magic = C2_MQ_MAGIC; + q->type = type; + q->priv = 0; + q->hint_count = 0; + return; +} +void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 *pool_start, u16 __iomem *peer, u32 type) +{ + BUG_ON(!q->shared); + + /* This code assumes the byte swapping has already been done! */ + q->index = index; + q->q_size = q_size; + q->msg_size = msg_size; + q->msg_pool.host = pool_start; + q->peer = (struct c2_mq_shared __iomem *) peer; + q->magic = C2_MQ_MAGIC; + q->type = type; + q->priv = 0; + q->hint_count = 0; + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h new file mode 100644 index 00000000..fc1b9a7c --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mq.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C2_MQ_H_ +#define _C2_MQ_H_ +#include +#include +#include "c2_wr.h" + +enum c2_shared_regs { + + C2_SHARED_ARMED = 0x10, + C2_SHARED_NOTIFY = 0x18, + C2_SHARED_SHARED = 0x40, +}; + +struct c2_mq_shared { + u16 unused1; + u8 armed; + u8 notification_type; + u32 unused2; + u16 shared; + /* Pad to 64 bytes. */ + u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)]; +}; + +enum c2_mq_type { + C2_MQ_HOST_TARGET = 1, + C2_MQ_ADAPTER_TARGET = 2, +}; + +/* + * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ. + * c2_user_mq_t (which is the same format) is for user-mode MQs... + */ +#define C2_MQ_MAGIC 0x4d512020 /* 'MQ ' */ +struct c2_mq { + u32 magic; + union { + u8 *host; + u8 __iomem *adapter; + } msg_pool; + dma_addr_t host_dma; + DEFINE_DMA_UNMAP_ADDR(mapping); + u16 hint_count; + u16 priv; + struct c2_mq_shared __iomem *peer; + __be16 *shared; + dma_addr_t shared_dma; + u32 q_size; + u32 msg_size; + u32 index; + enum c2_mq_type type; +}; + +static __inline__ int c2_mq_empty(struct c2_mq *q) +{ + return q->priv == be16_to_cpu(*q->shared); +} + +static __inline__ int c2_mq_full(struct c2_mq *q) +{ + return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size; +} + +extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count); +extern void *c2_mq_alloc(struct c2_mq *q); +extern void c2_mq_produce(struct c2_mq *q); +extern void *c2_mq_consume(struct c2_mq *q); +extern void c2_mq_free(struct c2_mq *q); +extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 __iomem *pool_start, u16 __iomem *peer, u32 type); +extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 *pool_start, u16 __iomem *peer, u32 type); + +#endif /* _C2_MQ_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c new file mode 100644 index 00000000..161f2a28 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_pd.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "c2.h" +#include "c2_provider.h" + +int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd) +{ + u32 obj; + int ret = 0; + + spin_lock(&c2dev->pd_table.lock); + obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max, + c2dev->pd_table.last); + if (obj >= c2dev->pd_table.max) + obj = find_first_zero_bit(c2dev->pd_table.table, + c2dev->pd_table.max); + if (obj < c2dev->pd_table.max) { + pd->pd_id = obj; + __set_bit(obj, c2dev->pd_table.table); + c2dev->pd_table.last = obj+1; + if (c2dev->pd_table.last >= c2dev->pd_table.max) + c2dev->pd_table.last = 0; + } else + ret = -ENOMEM; + spin_unlock(&c2dev->pd_table.lock); + return ret; +} + +void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd) +{ + spin_lock(&c2dev->pd_table.lock); + __clear_bit(pd->pd_id, c2dev->pd_table.table); + spin_unlock(&c2dev->pd_table.lock); +} + +int __devinit c2_init_pd_table(struct c2_dev *c2dev) +{ + + c2dev->pd_table.last = 0; + c2dev->pd_table.max = c2dev->props.max_pd; + spin_lock_init(&c2dev->pd_table.lock); + c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) * + sizeof(long), GFP_KERNEL); + if (!c2dev->pd_table.table) + return -ENOMEM; + bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd); + return 0; +} + +void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev) +{ + kfree(c2dev->pd_table.table); +} diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c new file mode 100644 index 00000000..07eb3a80 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -0,0 +1,887 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include "c2.h" +#include "c2_provider.h" +#include "c2_user.h" + +static int c2_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct c2_dev *c2dev = to_c2dev(ibdev); + + pr_debug("%s:%u\n", __func__, __LINE__); + + *props = c2dev->props; + return 0; +} + +static int c2_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + + props->max_mtu = IB_MTU_4096; + props->lid = 0; + props->lmc = 0; + props->sm_lid = 0; + props->sm_sl = 0; + props->state = IB_PORT_ACTIVE; + props->phys_state = 0; + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->qkey_viol_cntr = 0; + props->active_width = 1; + props->active_speed = IB_SPEED_SDR; + + return 0; +} + +static int c2_query_pkey(struct ib_device *ibdev, + u8 port, u16 index, u16 * pkey) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + *pkey = 0; + return 0; +} + +static int c2_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct c2_dev *c2dev = to_c2dev(ibdev); + + pr_debug("%s:%u\n", __func__, __LINE__); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6); + + return 0; +} + +/* Allocate the user context data structure. This keeps track + * of all objects associated with a particular user-mode client. + */ +static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct c2_ucontext *context; + + pr_debug("%s:%u\n", __func__, __LINE__); + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + return &context->ibucontext; +} + +static int c2_dealloc_ucontext(struct ib_ucontext *context) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + kfree(context); + return 0; +} + +static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return -ENOSYS; +} + +static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c2_pd *pd; + int err; + + pr_debug("%s:%u\n", __func__, __LINE__); + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = c2_pd_alloc(to_c2dev(ibdev), !context, pd); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) { + c2_pd_free(to_c2dev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + + return &pd->ibpd; +} + +static int c2_dealloc_pd(struct ib_pd *pd) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + c2_pd_free(to_c2dev(pd->device), to_c2pd(pd)); + kfree(pd); + + return 0; +} + +static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return ERR_PTR(-ENOSYS); +} + +static int c2_ah_destroy(struct ib_ah *ah) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return -ENOSYS; +} + +static void c2_add_ref(struct ib_qp *ibqp) +{ + struct c2_qp *qp; + BUG_ON(!ibqp); + qp = to_c2qp(ibqp); + atomic_inc(&qp->refcount); +} + +static void c2_rem_ref(struct ib_qp *ibqp) +{ + struct c2_qp *qp; + BUG_ON(!ibqp); + qp = to_c2qp(ibqp); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +struct ib_qp *c2_get_qp(struct ib_device *device, int qpn) +{ + struct c2_dev* c2dev = to_c2dev(device); + struct c2_qp *qp; + + qp = c2_find_qpn(c2dev, qpn); + pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n", + __func__, qp, qpn, device, + (qp?atomic_read(&qp->refcount):0)); + + return (qp?&qp->ibqp:NULL); +} + +static struct ib_qp *c2_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct c2_qp *qp; + int err; + + pr_debug("%s:%u\n", __func__, __LINE__); + + if (init_attr->create_flags) + return ERR_PTR(-EINVAL); + + switch (init_attr->qp_type) { + case IB_QPT_RC: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + pr_debug("%s: Unable to allocate QP\n", __func__); + return ERR_PTR(-ENOMEM); + } + spin_lock_init(&qp->lock); + if (pd->uobject) { + /* userspace specific */ + } + + err = c2_alloc_qp(to_c2dev(pd->device), + to_c2pd(pd), init_attr, qp); + + if (err && pd->uobject) { + /* userspace specific */ + } + + break; + default: + pr_debug("%s: Invalid QP type: %d\n", __func__, + init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + return &qp->ibqp; +} + +static int c2_destroy_qp(struct ib_qp *ib_qp) +{ + struct c2_qp *qp = to_c2qp(ib_qp); + + pr_debug("%s:%u qp=%p,qp->state=%d\n", + __func__, __LINE__, ib_qp, qp->state); + c2_free_qp(to_c2dev(ib_qp->device), qp); + kfree(qp); + return 0; +} + +static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c2_cq *cq; + int err; + + cq = kmalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + pr_debug("%s: Unable to allocate CQ\n", __func__); + return ERR_PTR(-ENOMEM); + } + + err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq); + if (err) { + pr_debug("%s: error initializing CQ\n", __func__); + kfree(cq); + return ERR_PTR(err); + } + + return &cq->ibcq; +} + +static int c2_destroy_cq(struct ib_cq *ib_cq) +{ + struct c2_cq *cq = to_c2cq(ib_cq); + + pr_debug("%s:%u\n", __func__, __LINE__); + + c2_free_cq(to_c2dev(ib_cq->device), cq); + kfree(cq); + + return 0; +} + +static inline u32 c2_convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) | + C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND; +} + +static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 * iova_start) +{ + struct c2_mr *mr; + u64 *page_list; + u32 total_len; + int err, i, j, k, page_shift, pbl_depth; + + pbl_depth = 0; + total_len = 0; + + page_shift = PAGE_SHIFT; + /* + * If there is only 1 buffer we assume this could + * be a map of all phy mem...use a 32k page_shift. + */ + if (num_phys_buf == 1) + page_shift += 3; + + for (i = 0; i < num_phys_buf; i++) { + + if (buffer_list[i].addr & ~PAGE_MASK) { + pr_debug("Unaligned Memory Buffer: 0x%x\n", + (unsigned int) buffer_list[i].addr); + return ERR_PTR(-EINVAL); + } + + if (!buffer_list[i].size) { + pr_debug("Invalid Buffer Size\n"); + return ERR_PTR(-EINVAL); + } + + total_len += buffer_list[i].size; + pbl_depth += ALIGN(buffer_list[i].size, + (1 << page_shift)) >> page_shift; + } + + page_list = vmalloc(sizeof(u64) * pbl_depth); + if (!page_list) { + pr_debug("couldn't vmalloc page_list of size %zd\n", + (sizeof(u64) * pbl_depth)); + return ERR_PTR(-ENOMEM); + } + + for (i = 0, j = 0; i < num_phys_buf; i++) { + + int naddrs; + + naddrs = ALIGN(buffer_list[i].size, + (1 << page_shift)) >> page_shift; + for (k = 0; k < naddrs; k++) + page_list[j++] = (buffer_list[i].addr + + (k << page_shift)); + } + + mr = kmalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + vfree(page_list); + return ERR_PTR(-ENOMEM); + } + + mr->pd = to_c2pd(ib_pd); + mr->umem = NULL; + pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " + "*iova_start %llx, first pa %llx, last pa %llx\n", + __func__, page_shift, pbl_depth, total_len, + (unsigned long long) *iova_start, + (unsigned long long) page_list[0], + (unsigned long long) page_list[pbl_depth-1]); + err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, + (1 << page_shift), pbl_depth, + total_len, 0, iova_start, + c2_convert_access(acc), mr); + vfree(page_list); + if (err) { + kfree(mr); + return ERR_PTR(err); + } + + return &mr->ibmr; +} + +static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ib_phys_buf bl; + u64 kva = 0; + + pr_debug("%s:%u\n", __func__, __LINE__); + + /* AMSO1100 limit */ + bl.size = 0xffffffff; + bl.addr = 0; + return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); +} + +static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + u64 *pages; + u64 kva = 0; + int shift, n, len; + int i, j, k; + int err = 0; + struct ib_umem_chunk *chunk; + struct c2_pd *c2pd = to_c2pd(pd); + struct c2_mr *c2mr; + + pr_debug("%s:%u\n", __func__, __LINE__); + + c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); + if (!c2mr) + return ERR_PTR(-ENOMEM); + c2mr->pd = c2pd; + + c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); + if (IS_ERR(c2mr->umem)) { + err = PTR_ERR(c2mr->umem); + kfree(c2mr); + return ERR_PTR(err); + } + + shift = ffs(c2mr->umem->page_size) - 1; + + n = 0; + list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) + n += chunk->nents; + + pages = kmalloc(n * sizeof(u64), GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err; + } + + i = 0; + list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) { + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = + sg_dma_address(&chunk->page_list[j]) + + (c2mr->umem->page_size * k); + } + } + } + + kva = virt; + err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), + pages, + c2mr->umem->page_size, + i, + length, + c2mr->umem->offset, + &kva, + c2_convert_access(acc), + c2mr); + kfree(pages); + if (err) + goto err; + return &c2mr->ibmr; + +err: + ib_umem_release(c2mr->umem); + kfree(c2mr); + return ERR_PTR(err); +} + +static int c2_dereg_mr(struct ib_mr *ib_mr) +{ + struct c2_mr *mr = to_c2mr(ib_mr); + int err; + + pr_debug("%s:%u\n", __func__, __LINE__); + + err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); + if (err) + pr_debug("c2_stag_dealloc failed: %d\n", err); + else { + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); + } + + return err; +} + +static ssize_t show_rev(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev); + pr_debug("%s:%u\n", __func__, __LINE__); + return sprintf(buf, "%x\n", c2dev->props.hw_ver); +} + +static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev); + pr_debug("%s:%u\n", __func__, __LINE__); + return sprintf(buf, "%x.%x.%x\n", + (int) (c2dev->props.fw_ver >> 32), + (int) (c2dev->props.fw_ver >> 16) & 0xffff, + (int) (c2dev->props.fw_ver & 0xffff)); +} + +static ssize_t show_hca(struct device *dev, struct device_attribute *attr, + char *buf) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return sprintf(buf, "AMSO1100\n"); +} + +static ssize_t show_board(struct device *dev, struct device_attribute *attr, + char *buf) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID"); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *c2_dev_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id +}; + +static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + int err; + + err = + c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr, + attr_mask); + + return err; +} + +static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return -ENOSYS; +} + +static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return -ENOSYS; +} + +static int c2_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + return -ENOSYS; +} + +static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + + /* Request a connection */ + return c2_llp_connect(cm_id, iw_param); +} + +static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + + /* Accept the new connection */ + return c2_llp_accept(cm_id, iw_param); +} + +static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err; + + pr_debug("%s:%u\n", __func__, __LINE__); + + err = c2_llp_reject(cm_id, pdata, pdata_len); + return err; +} + +static int c2_service_create(struct iw_cm_id *cm_id, int backlog) +{ + int err; + + pr_debug("%s:%u\n", __func__, __LINE__); + err = c2_llp_service_create(cm_id, backlog); + pr_debug("%s:%u err=%d\n", + __func__, __LINE__, + err); + return err; +} + +static int c2_service_destroy(struct iw_cm_id *cm_id) +{ + int err; + pr_debug("%s:%u\n", __func__, __LINE__); + + err = c2_llp_service_destroy(cm_id); + + return err; +} + +static int c2_pseudo_up(struct net_device *netdev) +{ + struct in_device *ind; + struct c2_dev *c2dev = netdev->ml_priv; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + pr_debug("adding...\n"); + for_ifa(ind) { +#ifdef DEBUG + u8 *ip = (u8 *) & ifa->ifa_address; + + pr_debug("%s: %d.%d.%d.%d\n", + ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); +#endif + c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); + } + endfor_ifa(ind); + in_dev_put(ind); + + return 0; +} + +static int c2_pseudo_down(struct net_device *netdev) +{ + struct in_device *ind; + struct c2_dev *c2dev = netdev->ml_priv; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + pr_debug("deleting...\n"); + for_ifa(ind) { +#ifdef DEBUG + u8 *ip = (u8 *) & ifa->ifa_address; + + pr_debug("%s: %d.%d.%d.%d\n", + ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); +#endif + c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); + } + endfor_ifa(ind); + in_dev_put(ind); + + return 0; +} + +static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu) +{ + if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) + return -EINVAL; + + netdev->mtu = new_mtu; + + /* TODO: Tell rnic about new rmda interface mtu */ + return 0; +} + +static const struct net_device_ops c2_pseudo_netdev_ops = { + .ndo_open = c2_pseudo_up, + .ndo_stop = c2_pseudo_down, + .ndo_start_xmit = c2_pseudo_xmit_frame, + .ndo_change_mtu = c2_pseudo_change_mtu, + .ndo_validate_addr = eth_validate_addr, +}; + +static void setup(struct net_device *netdev) +{ + netdev->netdev_ops = &c2_pseudo_netdev_ops; + + netdev->watchdog_timeo = 0; + netdev->type = ARPHRD_ETHER; + netdev->mtu = 1500; + netdev->hard_header_len = ETH_HLEN; + netdev->addr_len = ETH_ALEN; + netdev->tx_queue_len = 0; + netdev->flags |= IFF_NOARP; +} + +static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) +{ + char name[IFNAMSIZ]; + struct net_device *netdev; + + /* change ethxxx to iwxxx */ + strcpy(name, "iw"); + strcat(name, &c2dev->netdev->name[3]); + netdev = alloc_netdev(0, name, setup); + if (!netdev) { + printk(KERN_ERR PFX "%s - etherdev alloc failed", + __func__); + return NULL; + } + + netdev->ml_priv = c2dev; + + SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); + + memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); + + /* Print out the MAC address */ + pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr); + +#if 0 + /* Disable network packets */ + netif_stop_queue(netdev); +#endif + return netdev; +} + +int c2_register_device(struct c2_dev *dev) +{ + int ret = -ENOMEM; + int i; + + /* Register pseudo network device */ + dev->pseudo_netdev = c2_pseudo_netdev_init(dev); + if (!dev->pseudo_netdev) + goto out; + + ret = register_netdev(dev->pseudo_netdev); + if (ret) + goto out_free_netdev; + + pr_debug("%s:%u\n", __func__, __LINE__); + strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX); + dev->ibdev.owner = THIS_MODULE; + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + + dev->ibdev.node_type = RDMA_NODE_RNIC; + memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); + memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); + dev->ibdev.phys_port_cnt = 1; + dev->ibdev.num_comp_vectors = 1; + dev->ibdev.dma_device = &dev->pcidev->dev; + dev->ibdev.query_device = c2_query_device; + dev->ibdev.query_port = c2_query_port; + dev->ibdev.query_pkey = c2_query_pkey; + dev->ibdev.query_gid = c2_query_gid; + dev->ibdev.alloc_ucontext = c2_alloc_ucontext; + dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext; + dev->ibdev.mmap = c2_mmap_uar; + dev->ibdev.alloc_pd = c2_alloc_pd; + dev->ibdev.dealloc_pd = c2_dealloc_pd; + dev->ibdev.create_ah = c2_ah_create; + dev->ibdev.destroy_ah = c2_ah_destroy; + dev->ibdev.create_qp = c2_create_qp; + dev->ibdev.modify_qp = c2_modify_qp; + dev->ibdev.destroy_qp = c2_destroy_qp; + dev->ibdev.create_cq = c2_create_cq; + dev->ibdev.destroy_cq = c2_destroy_cq; + dev->ibdev.poll_cq = c2_poll_cq; + dev->ibdev.get_dma_mr = c2_get_dma_mr; + dev->ibdev.reg_phys_mr = c2_reg_phys_mr; + dev->ibdev.reg_user_mr = c2_reg_user_mr; + dev->ibdev.dereg_mr = c2_dereg_mr; + + dev->ibdev.alloc_fmr = NULL; + dev->ibdev.unmap_fmr = NULL; + dev->ibdev.dealloc_fmr = NULL; + dev->ibdev.map_phys_fmr = NULL; + + dev->ibdev.attach_mcast = c2_multicast_attach; + dev->ibdev.detach_mcast = c2_multicast_detach; + dev->ibdev.process_mad = c2_process_mad; + + dev->ibdev.req_notify_cq = c2_arm_cq; + dev->ibdev.post_send = c2_post_send; + dev->ibdev.post_recv = c2_post_receive; + + dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); + if (dev->ibdev.iwcm == NULL) { + ret = -ENOMEM; + goto out_unregister_netdev; + } + dev->ibdev.iwcm->add_ref = c2_add_ref; + dev->ibdev.iwcm->rem_ref = c2_rem_ref; + dev->ibdev.iwcm->get_qp = c2_get_qp; + dev->ibdev.iwcm->connect = c2_connect; + dev->ibdev.iwcm->accept = c2_accept; + dev->ibdev.iwcm->reject = c2_reject; + dev->ibdev.iwcm->create_listen = c2_service_create; + dev->ibdev.iwcm->destroy_listen = c2_service_destroy; + + ret = ib_register_device(&dev->ibdev, NULL); + if (ret) + goto out_free_iwcm; + + for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) { + ret = device_create_file(&dev->ibdev.dev, + c2_dev_attributes[i]); + if (ret) + goto out_unregister_ibdev; + } + goto out; + +out_unregister_ibdev: + ib_unregister_device(&dev->ibdev); +out_free_iwcm: + kfree(dev->ibdev.iwcm); +out_unregister_netdev: + unregister_netdev(dev->pseudo_netdev); +out_free_netdev: + free_netdev(dev->pseudo_netdev); +out: + pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret); + return ret; +} + +void c2_unregister_device(struct c2_dev *dev) +{ + pr_debug("%s:%u\n", __func__, __LINE__); + unregister_netdev(dev->pseudo_netdev); + free_netdev(dev->pseudo_netdev); + ib_unregister_device(&dev->ibdev); +} diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h new file mode 100644 index 00000000..bf189987 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_provider.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef C2_PROVIDER_H +#define C2_PROVIDER_H +#include + +#include +#include + +#include "c2_mq.h" +#include + +#define C2_MPT_FLAG_ATOMIC (1 << 14) +#define C2_MPT_FLAG_REMOTE_WRITE (1 << 13) +#define C2_MPT_FLAG_REMOTE_READ (1 << 12) +#define C2_MPT_FLAG_LOCAL_WRITE (1 << 11) +#define C2_MPT_FLAG_LOCAL_READ (1 << 10) + +struct c2_buf_list { + void *buf; + DEFINE_DMA_UNMAP_ADDR(mapping); +}; + + +/* The user context keeps track of objects allocated for a + * particular user-mode client. */ +struct c2_ucontext { + struct ib_ucontext ibucontext; +}; + +struct c2_mtt; + +/* All objects associated with a PD are kept in the + * associated user context if present. + */ +struct c2_pd { + struct ib_pd ibpd; + u32 pd_id; +}; + +struct c2_mr { + struct ib_mr ibmr; + struct c2_pd *pd; + struct ib_umem *umem; +}; + +struct c2_av; + +enum c2_ah_type { + C2_AH_ON_HCA, + C2_AH_PCI_POOL, + C2_AH_KMALLOC +}; + +struct c2_ah { + struct ib_ah ibah; +}; + +struct c2_cq { + struct ib_cq ibcq; + spinlock_t lock; + atomic_t refcount; + int cqn; + int is_kernel; + wait_queue_head_t wait; + + u32 adapter_handle; + struct c2_mq mq; +}; + +struct c2_wq { + spinlock_t lock; +}; +struct iw_cm_id; +struct c2_qp { + struct ib_qp ibqp; + struct iw_cm_id *cm_id; + spinlock_t lock; + atomic_t refcount; + wait_queue_head_t wait; + int qpn; + + u32 adapter_handle; + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u8 state; + + struct c2_mq sq_mq; + struct c2_mq rq_mq; +}; + +struct c2_cr_query_attrs { + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; +}; + +static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct c2_pd, ibpd); +} + +static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct c2_ucontext, ibucontext); +} + +static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct c2_mr, ibmr); +} + + +static inline struct c2_ah *to_c2ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct c2_ah, ibah); +} + +static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct c2_cq, ibcq); +} + +static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct c2_qp, ibqp); +} + +static inline int is_rnic_addr(struct net_device *netdev, u32 addr) +{ + struct in_device *ind; + int ret = 0; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + for_ifa(ind) { + if (ifa->ifa_address == addr) { + ret = 1; + break; + } + } + endfor_ifa(ind); + in_dev_put(ind); + return ret; +} +#endif /* C2_PROVIDER_H */ diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c new file mode 100644 index 00000000..0d7b6f23 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -0,0 +1,1022 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include + +#include "c2.h" +#include "c2_vq.h" +#include "c2_status.h" + +#define C2_MAX_ORD_PER_QP 128 +#define C2_MAX_IRD_PER_QP 128 + +#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) +#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) +#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) + +#define NO_SUPPORT -1 +static const u8 c2_opcode[] = { + [IB_WR_SEND] = C2_WR_TYPE_SEND, + [IB_WR_SEND_WITH_IMM] = NO_SUPPORT, + [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT, + [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT, + [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT, +}; + +static int to_c2_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + return C2_QP_STATE_IDLE; + case IB_QPS_RTS: + return C2_QP_STATE_RTS; + case IB_QPS_SQD: + return C2_QP_STATE_CLOSING; + case IB_QPS_SQE: + return C2_QP_STATE_CLOSING; + case IB_QPS_ERR: + return C2_QP_STATE_ERROR; + default: + return -1; + } +} + +static int to_ib_state(enum c2_qp_state c2_state) +{ + switch (c2_state) { + case C2_QP_STATE_IDLE: + return IB_QPS_RESET; + case C2_QP_STATE_CONNECTING: + return IB_QPS_RTR; + case C2_QP_STATE_RTS: + return IB_QPS_RTS; + case C2_QP_STATE_CLOSING: + return IB_QPS_SQD; + case C2_QP_STATE_ERROR: + return IB_QPS_ERR; + case C2_QP_STATE_TERMINATE: + return IB_QPS_SQE; + default: + return -1; + } +} + +static const char *to_ib_state_str(int ib_state) +{ + static const char *state_str[] = { + "IB_QPS_RESET", + "IB_QPS_INIT", + "IB_QPS_RTR", + "IB_QPS_RTS", + "IB_QPS_SQD", + "IB_QPS_SQE", + "IB_QPS_ERR" + }; + if (ib_state < IB_QPS_RESET || + ib_state > IB_QPS_ERR) + return ""; + + ib_state -= IB_QPS_RESET; + return state_str[ib_state]; +} + +void c2_set_qp_state(struct c2_qp *qp, int c2_state) +{ + int new_state = to_ib_state(c2_state); + + pr_debug("%s: qp[%p] state modify %s --> %s\n", + __func__, + qp, + to_ib_state_str(qp->state), + to_ib_state_str(new_state)); + qp->state = new_state; +} + +#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF + +int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, + struct ib_qp_attr *attr, int attr_mask) +{ + struct c2wr_qp_modify_req wr; + struct c2wr_qp_modify_rep *reply; + struct c2_vq_req *vq_req; + unsigned long flags; + u8 next_state; + int err; + + pr_debug("%s:%d qp=%p, %s --> %s\n", + __func__, __LINE__, + qp, + to_ib_state_str(qp->state), + to_ib_state_str(attr->qp_state)); + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_QP_MODIFY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + + if (attr_mask & IB_QP_STATE) { + /* Ensure the state is valid */ + if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) { + err = -EINVAL; + goto bail0; + } + + wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state)); + + if (attr->qp_state == IB_QPS_ERR) { + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id && qp->state == IB_QPS_RTS) { + pr_debug("Generating CLOSE event for QP-->ERR, " + "qp=%p, cm_id=%p\n",qp,qp->cm_id); + /* Generate an CLOSE event */ + vq_req->cm_id = qp->cm_id; + vq_req->event = IW_CM_EVENT_CLOSE; + } + spin_unlock_irqrestore(&qp->lock, flags); + } + next_state = attr->qp_state; + + } else if (attr_mask & IB_QP_CUR_STATE) { + + if (attr->cur_qp_state != IB_QPS_RTR && + attr->cur_qp_state != IB_QPS_RTS && + attr->cur_qp_state != IB_QPS_SQD && + attr->cur_qp_state != IB_QPS_SQE) { + err = -EINVAL; + goto bail0; + } else + wr.next_qp_state = + cpu_to_be32(to_c2_state(attr->cur_qp_state)); + + next_state = attr->cur_qp_state; + + } else { + err = 0; + goto bail0; + } + + /* reference the request struct */ + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + if (!err) + qp->state = next_state; +#ifdef DEBUG + else + pr_debug("%s: c2_errno=%d\n", __func__, err); +#endif + /* + * If we're going to error and generating the event here, then + * we need to remove the reference because there will be no + * close event generated by the adapter + */ + spin_lock_irqsave(&qp->lock, flags); + if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + + pr_debug("%s:%d qp=%p, cur_state=%s\n", + __func__, __LINE__, + qp, + to_ib_state_str(qp->state)); + return err; +} + +int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, + int ord, int ird) +{ + struct c2wr_qp_modify_req wr; + struct c2wr_qp_modify_rep *reply; + struct c2_vq_req *vq_req; + int err; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_QP_MODIFY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + wr.ord = cpu_to_be32(ord); + wr.ird = cpu_to_be32(ird); + wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + + /* reference the request struct */ + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + reply = (struct c2wr_qp_modify_rep *) (unsigned long) + vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp) +{ + struct c2_vq_req *vq_req; + struct c2wr_qp_destroy_req wr; + struct c2wr_qp_destroy_rep *reply; + unsigned long flags; + int err; + + /* + * Allocate a verb request message + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + return -ENOMEM; + } + + /* + * Initialize the WR + */ + c2_wr_set_id(&wr, CCWR_QP_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id && qp->state == IB_QPS_RTS) { + pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, " + "qp=%p, cm_id=%p\n",qp,qp->cm_id); + /* Generate an CLOSE event */ + vq_req->qp = qp; + vq_req->cm_id = qp->cm_id; + vq_req->event = IW_CM_EVENT_CLOSE; + } + spin_unlock_irqrestore(&qp->lock, flags); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) +{ + int ret; + + do { + spin_lock_irq(&c2dev->qp_table.lock); + ret = idr_get_new_above(&c2dev->qp_table.idr, qp, + c2dev->qp_table.last++, &qp->qpn); + spin_unlock_irq(&c2dev->qp_table.lock); + } while ((ret == -EAGAIN) && + idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL)); + return ret; +} + +static void c2_free_qpn(struct c2_dev *c2dev, int qpn) +{ + spin_lock_irq(&c2dev->qp_table.lock); + idr_remove(&c2dev->qp_table.idr, qpn); + spin_unlock_irq(&c2dev->qp_table.lock); +} + +struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn) +{ + unsigned long flags; + struct c2_qp *qp; + + spin_lock_irqsave(&c2dev->qp_table.lock, flags); + qp = idr_find(&c2dev->qp_table.idr, qpn); + spin_unlock_irqrestore(&c2dev->qp_table.lock, flags); + return qp; +} + +int c2_alloc_qp(struct c2_dev *c2dev, + struct c2_pd *pd, + struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp) +{ + struct c2wr_qp_create_req wr; + struct c2wr_qp_create_rep *reply; + struct c2_vq_req *vq_req; + struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq); + struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq); + unsigned long peer_pa; + u32 q_size, msg_size, mmap_size; + void __iomem *mmap; + int err; + + err = c2_alloc_qpn(c2dev, qp); + if (err) + return err; + qp->ibqp.qp_num = qp->qpn; + qp->ibqp.qp_type = IB_QPT_RC; + + /* Allocate the SQ and RQ shared pointers */ + qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &qp->sq_mq.shared_dma, GFP_KERNEL); + if (!qp->sq_mq.shared) { + err = -ENOMEM; + goto bail0; + } + + qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &qp->rq_mq.shared_dma, GFP_KERNEL); + if (!qp->rq_mq.shared) { + err = -ENOMEM; + goto bail1; + } + + /* Allocate the verbs request */ + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + err = -ENOMEM; + goto bail2; + } + + /* Initialize the work request */ + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_QP_CREATE); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.sq_cq_handle = send_cq->adapter_handle; + wr.rq_cq_handle = recv_cq->adapter_handle; + wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1); + wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1); + wr.srq_handle = 0; + wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND | + QP_ZERO_STAG | QP_RDMA_READ_RESPONSE); + wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); + wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge); + wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); + wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma); + wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma); + wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP); + wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP); + wr.pd_id = pd->pd_id; + wr.user_context = (unsigned long) qp; + + vq_req_get(c2dev, vq_req); + + /* Send the WR to the adapter */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail3; + } + + /* Wait for the verb reply */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail3; + } + + /* Process the reply */ + reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail3; + } + + if ((err = c2_wr_get_result(reply)) != 0) { + goto bail4; + } + + /* Fill in the kernel QP struct */ + atomic_set(&qp->refcount, 1); + qp->adapter_handle = reply->qp_handle; + qp->state = IB_QPS_RESET; + qp->send_sgl_depth = qp_attrs->cap.max_send_sge; + qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge; + qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge; + init_waitqueue_head(&qp->wait); + + /* Initialize the SQ MQ */ + q_size = be32_to_cpu(reply->sq_depth); + msg_size = be32_to_cpu(reply->sq_msg_size); + peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start); + mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); + mmap = ioremap_nocache(peer_pa, mmap_size); + if (!mmap) { + err = -ENOMEM; + goto bail5; + } + + c2_mq_req_init(&qp->sq_mq, + be32_to_cpu(reply->sq_mq_index), + q_size, + msg_size, + mmap + sizeof(struct c2_mq_shared), /* pool start */ + mmap, /* peer */ + C2_MQ_ADAPTER_TARGET); + + /* Initialize the RQ mq */ + q_size = be32_to_cpu(reply->rq_depth); + msg_size = be32_to_cpu(reply->rq_msg_size); + peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start); + mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); + mmap = ioremap_nocache(peer_pa, mmap_size); + if (!mmap) { + err = -ENOMEM; + goto bail6; + } + + c2_mq_req_init(&qp->rq_mq, + be32_to_cpu(reply->rq_mq_index), + q_size, + msg_size, + mmap + sizeof(struct c2_mq_shared), /* pool start */ + mmap, /* peer */ + C2_MQ_ADAPTER_TARGET); + + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + return 0; + + bail6: + iounmap(qp->sq_mq.peer); + bail5: + destroy_qp(c2dev, qp); + bail4: + vq_repbuf_free(c2dev, reply); + bail3: + vq_req_free(c2dev, vq_req); + bail2: + c2_free_mqsp(qp->rq_mq.shared); + bail1: + c2_free_mqsp(qp->sq_mq.shared); + bail0: + c2_free_qpn(c2dev, qp->qpn); + return err; +} + +static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_lock_irq(&send_cq->lock); + else if (send_cq > recv_cq) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_unlock_irq(&send_cq->lock); + else if (send_cq > recv_cq) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + +void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) +{ + struct c2_cq *send_cq; + struct c2_cq *recv_cq; + + send_cq = to_c2cq(qp->ibqp.send_cq); + recv_cq = to_c2cq(qp->ibqp.recv_cq); + + /* + * Lock CQs here, so that CQ polling code can do QP lookup + * without taking a lock. + */ + c2_lock_cqs(send_cq, recv_cq); + c2_free_qpn(c2dev, qp->qpn); + c2_unlock_cqs(send_cq, recv_cq); + + /* + * Destroy qp in the rnic... + */ + destroy_qp(c2dev, qp); + + /* + * Mark any unreaped CQEs as null and void. + */ + c2_cq_clean(c2dev, qp, send_cq->cqn); + if (send_cq != recv_cq) + c2_cq_clean(c2dev, qp, recv_cq->cqn); + /* + * Unmap the MQs and return the shared pointers + * to the message pool. + */ + iounmap(qp->sq_mq.peer); + iounmap(qp->rq_mq.peer); + c2_free_mqsp(qp->sq_mq.shared); + c2_free_mqsp(qp->rq_mq.shared); + + atomic_dec(&qp->refcount); + wait_event(qp->wait, !atomic_read(&qp->refcount)); +} + +/* + * Function: move_sgl + * + * Description: + * Move an SGL from the user's work request struct into a CCIL Work Request + * message, swapping to WR byte order and ensure the total length doesn't + * overflow. + * + * IN: + * dst - ptr to CCIL Work Request message SGL memory. + * src - ptr to the consumers SGL memory. + * + * OUT: none + * + * Return: + * CCIL status codes. + */ +static int +move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len, + u8 * actual_count) +{ + u32 tot = 0; /* running total */ + u8 acount = 0; /* running total non-0 len sge's */ + + while (count > 0) { + /* + * If the addition of this SGE causes the + * total SGL length to exceed 2^32-1, then + * fail-n-bail. + * + * If the current total plus the next element length + * wraps, then it will go negative and be less than the + * current total... + */ + if ((tot + src->length) < tot) { + return -EINVAL; + } + /* + * Bug: 1456 (as well as 1498 & 1643) + * Skip over any sge's supplied with len=0 + */ + if (src->length) { + tot += src->length; + dst->stag = cpu_to_be32(src->lkey); + dst->to = cpu_to_be64(src->addr); + dst->length = cpu_to_be32(src->length); + dst++; + acount++; + } + src++; + count--; + } + + if (acount == 0) { + /* + * Bug: 1476 (as well as 1498, 1456 and 1643) + * Setup the SGL in the WR to make it easier for the RNIC. + * This way, the FW doesn't have to deal with special cases. + * Setting length=0 should be sufficient. + */ + dst->stag = 0; + dst->to = 0; + dst->length = 0; + } + + *p_len = tot; + *actual_count = acount; + return 0; +} + +/* + * Function: c2_activity (private function) + * + * Description: + * Post an mq index to the host->adapter activity fifo. + * + * IN: + * c2dev - ptr to c2dev structure + * mq_index - mq index to post + * shared - value most recently written to shared + * + * OUT: + * + * Return: + * none + */ +static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared) +{ + /* + * First read the register to see if the FIFO is full, and if so, + * spin until it's not. This isn't perfect -- there is no + * synchronization among the clients of the register, but in + * practice it prevents multiple CPU from hammering the bus + * with PCI RETRY. Note that when this does happen, the card + * cannot get on the bus and the card and system hang in a + * deadlock -- thus the need for this code. [TOT] + */ + while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) + udelay(10); + + __raw_writel(C2_HINT_MAKE(mq_index, shared), + c2dev->regs + PCI_BAR0_ADAPTER_HINT); +} + +/* + * Function: qp_wr_post + * + * Description: + * This in-line function allocates a MQ msg, then moves the host-copy of + * the completed WR into msg. Then it posts the message. + * + * IN: + * q - ptr to user MQ. + * wr - ptr to host-copy of the WR. + * qp - ptr to user qp + * size - Number of bytes to post. Assumed to be divisible by 4. + * + * OUT: none + * + * Return: + * CCIL status codes. + */ +static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size) +{ + union c2wr *msg; + + msg = c2_mq_alloc(q); + if (msg == NULL) { + return -EINVAL; + } +#ifdef CCMSGMAGIC + ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC); +#endif + + /* + * Since all header fields in the WR are the same as the + * CQE, set the following so the adapter need not. + */ + c2_wr_set_result(wr, CCERR_PENDING); + + /* + * Copy the wr down to the adapter + */ + memcpy((void *) msg, (void *) wr, size); + + c2_mq_produce(q); + return 0; +} + + +int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr) +{ + struct c2_dev *c2dev = to_c2dev(ibqp->device); + struct c2_qp *qp = to_c2qp(ibqp); + union c2wr wr; + unsigned long lock_flags; + int err = 0; + + u32 flags; + u32 tot_len; + u8 actual_sge_count; + u32 msg_size; + + if (qp->state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } + + while (ib_wr) { + + flags = 0; + wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id; + if (ib_wr->send_flags & IB_SEND_SIGNALED) { + flags |= SQ_SIGNALED; + } + + switch (ib_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + if (ib_wr->opcode == IB_WR_SEND) { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE); + else + c2_wr_set_id(&wr, C2_WR_TYPE_SEND); + wr.sqwr.send.remote_stag = 0; + } else { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV); + else + c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV); + wr.sqwr.send.remote_stag = + cpu_to_be32(ib_wr->ex.invalidate_rkey); + } + + msg_size = sizeof(struct c2wr_send_req) + + sizeof(struct c2_data_addr) * ib_wr->num_sge; + if (ib_wr->num_sge > qp->send_sgl_depth) { + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + flags |= SQ_READ_FENCE; + } + err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data), + ib_wr->sg_list, + ib_wr->num_sge, + &tot_len, &actual_sge_count); + wr.sqwr.send.sge_len = cpu_to_be32(tot_len); + c2_wr_set_sge_count(&wr, actual_sge_count); + break; + case IB_WR_RDMA_WRITE: + c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE); + msg_size = sizeof(struct c2wr_rdma_write_req) + + (sizeof(struct c2_data_addr) * ib_wr->num_sge); + if (ib_wr->num_sge > qp->rdma_write_sgl_depth) { + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + flags |= SQ_READ_FENCE; + } + wr.sqwr.rdma_write.remote_stag = + cpu_to_be32(ib_wr->wr.rdma.rkey); + wr.sqwr.rdma_write.remote_to = + cpu_to_be64(ib_wr->wr.rdma.remote_addr); + err = move_sgl((struct c2_data_addr *) + & (wr.sqwr.rdma_write.data), + ib_wr->sg_list, + ib_wr->num_sge, + &tot_len, &actual_sge_count); + wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len); + c2_wr_set_sge_count(&wr, actual_sge_count); + break; + case IB_WR_RDMA_READ: + c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ); + msg_size = sizeof(struct c2wr_rdma_read_req); + + /* IWarp only suppots 1 sge for RDMA reads */ + if (ib_wr->num_sge > 1) { + err = -EINVAL; + break; + } + + /* + * Move the local and remote stag/to/len into the WR. + */ + wr.sqwr.rdma_read.local_stag = + cpu_to_be32(ib_wr->sg_list->lkey); + wr.sqwr.rdma_read.local_to = + cpu_to_be64(ib_wr->sg_list->addr); + wr.sqwr.rdma_read.remote_stag = + cpu_to_be32(ib_wr->wr.rdma.rkey); + wr.sqwr.rdma_read.remote_to = + cpu_to_be64(ib_wr->wr.rdma.remote_addr); + wr.sqwr.rdma_read.length = + cpu_to_be32(ib_wr->sg_list->length); + break; + default: + /* error */ + msg_size = 0; + err = -EINVAL; + break; + } + + /* + * If we had an error on the last wr build, then + * break out. Possible errors include bogus WR + * type, and a bogus SGL length... + */ + if (err) { + break; + } + + /* + * Store flags + */ + c2_wr_set_flags(&wr, flags); + + /* + * Post the puppy! + */ + spin_lock_irqsave(&qp->lock, lock_flags); + err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size); + if (err) { + spin_unlock_irqrestore(&qp->lock, lock_flags); + break; + } + + /* + * Enqueue mq index to activity FIFO. + */ + c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count); + spin_unlock_irqrestore(&qp->lock, lock_flags); + + ib_wr = ib_wr->next; + } + +out: + if (err) + *bad_wr = ib_wr; + return err; +} + +int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr) +{ + struct c2_dev *c2dev = to_c2dev(ibqp->device); + struct c2_qp *qp = to_c2qp(ibqp); + union c2wr wr; + unsigned long lock_flags; + int err = 0; + + if (qp->state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } + + /* + * Try and post each work request + */ + while (ib_wr) { + u32 tot_len; + u8 actual_sge_count; + + if (ib_wr->num_sge > qp->recv_sgl_depth) { + err = -EINVAL; + break; + } + + /* + * Create local host-copy of the WR + */ + wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id; + c2_wr_set_id(&wr, CCWR_RECV); + c2_wr_set_flags(&wr, 0); + + /* sge_count is limited to eight bits. */ + BUG_ON(ib_wr->num_sge >= 256); + err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data), + ib_wr->sg_list, + ib_wr->num_sge, &tot_len, &actual_sge_count); + c2_wr_set_sge_count(&wr, actual_sge_count); + + /* + * If we had an error on the last wr build, then + * break out. Possible errors include bogus WR + * type, and a bogus SGL length... + */ + if (err) { + break; + } + + spin_lock_irqsave(&qp->lock, lock_flags); + err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size); + if (err) { + spin_unlock_irqrestore(&qp->lock, lock_flags); + break; + } + + /* + * Enqueue mq index to activity FIFO + */ + c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count); + spin_unlock_irqrestore(&qp->lock, lock_flags); + + ib_wr = ib_wr->next; + } + +out: + if (err) + *bad_wr = ib_wr; + return err; +} + +void __devinit c2_init_qp_table(struct c2_dev *c2dev) +{ + spin_lock_init(&c2dev->qp_table.lock); + idr_init(&c2dev->qp_table.idr); +} + +void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev) +{ + idr_destroy(&c2dev->qp_table.idr); +} diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c new file mode 100644 index 00000000..8c81992f --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -0,0 +1,654 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include "c2.h" +#include "c2_vq.h" + +/* Device capabilities */ +#define C2_MIN_PAGESIZE 1024 + +#define C2_MAX_MRS 32768 +#define C2_MAX_QPS 16000 +#define C2_MAX_WQE_SZ 256 +#define C2_MAX_QP_WR ((128*1024)/C2_MAX_WQE_SZ) +#define C2_MAX_SGES 4 +#define C2_MAX_SGE_RD 1 +#define C2_MAX_CQS 32768 +#define C2_MAX_CQES 4096 +#define C2_MAX_PDS 16384 + +/* + * Send the adapter INIT message to the amso1100 + */ +static int c2_adapter_init(struct c2_dev *c2dev) +{ + struct c2wr_init_req wr; + int err; + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_INIT); + wr.hdr.context = 0; + wr.hint_count = cpu_to_be64(c2dev->hint_count_dma); + wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma); + wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma); + wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma); + wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma); + wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma); + + /* Post the init message */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + + return err; +} + +/* + * Send the adapter TERM message to the amso1100 + */ +static void c2_adapter_term(struct c2_dev *c2dev) +{ + struct c2wr_init_req wr; + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_TERM); + wr.hdr.context = 0; + + /* Post the init message */ + vq_send_wr(c2dev, (union c2wr *) & wr); + c2dev->init = 0; + + return; +} + +/* + * Query the adapter + */ +static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_query_req wr; + struct c2wr_rnic_query_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_RNIC_QUERY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) + err = -ENOMEM; + else + err = c2_errno(reply); + if (err) + goto bail2; + + props->fw_ver = + ((u64)be32_to_cpu(reply->fw_ver_major) << 32) | + ((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) | + (be32_to_cpu(reply->fw_ver_patch) & 0xFFFF); + memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6); + props->max_mr_size = 0xFFFFFFFF; + props->page_size_cap = ~(C2_MIN_PAGESIZE-1); + props->vendor_id = be32_to_cpu(reply->vendor_id); + props->vendor_part_id = be32_to_cpu(reply->part_number); + props->hw_ver = be32_to_cpu(reply->hw_version); + props->max_qp = be32_to_cpu(reply->max_qps); + props->max_qp_wr = be32_to_cpu(reply->max_qp_depth); + props->device_cap_flags = c2dev->device_cap_flags; + props->max_sge = C2_MAX_SGES; + props->max_sge_rd = C2_MAX_SGE_RD; + props->max_cq = be32_to_cpu(reply->max_cqs); + props->max_cqe = be32_to_cpu(reply->max_cq_depth); + props->max_mr = be32_to_cpu(reply->max_mrs); + props->max_pd = be32_to_cpu(reply->max_pds); + props->max_qp_rd_atom = be32_to_cpu(reply->max_qp_ird); + props->max_ee_rd_atom = 0; + props->max_res_rd_atom = be32_to_cpu(reply->max_global_ird); + props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord); + props->max_ee_init_rd_atom = 0; + props->atomic_cap = IB_ATOMIC_NONE; + props->max_ee = 0; + props->max_rdd = 0; + props->max_mw = be32_to_cpu(reply->max_mws); + props->max_raw_ipv6_qp = 0; + props->max_raw_ethy_qp = 0; + props->max_mcast_grp = 0; + props->max_mcast_qp_attach = 0; + props->max_total_mcast_qp_attach = 0; + props->max_ah = 0; + props->max_fmr = 0; + props->max_map_per_fmr = 0; + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_pkeys = 0; + props->local_ca_ack_delay = 0; + + bail2: + vq_repbuf_free(c2dev, reply); + + bail1: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Add an IP address to the RNIC interface + */ +int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_setconfig_req *wr; + struct c2wr_rnic_setconfig_rep *reply; + struct c2_netaddr netaddr; + int err, len; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + len = sizeof(struct c2_netaddr); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->option = cpu_to_be32(C2_CFG_ADD_ADDR); + + netaddr.ip_addr = inaddr; + netaddr.netmask = inmask; + netaddr.mtu = 0; + + memcpy(wr->data, &netaddr, len); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Delete an IP address from the RNIC interface + */ +int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_setconfig_req *wr; + struct c2wr_rnic_setconfig_rep *reply; + struct c2_netaddr netaddr; + int err, len; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + len = sizeof(struct c2_netaddr); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->option = cpu_to_be32(C2_CFG_DEL_ADDR); + + netaddr.ip_addr = inaddr; + netaddr.netmask = inmask; + netaddr.mtu = 0; + + memcpy(wr->data, &netaddr, len); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Open a single RNIC instance to use with all + * low level openib calls + */ +static int c2_rnic_open(struct c2_dev *c2dev) +{ + struct c2_vq_req *vq_req; + union c2wr wr; + struct c2wr_rnic_open_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + return -ENOMEM; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_RNIC_OPEN); + wr.rnic_open.req.hdr.context = (unsigned long) (vq_req); + wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE); + wr.rnic_open.req.port_num = cpu_to_be16(0); + wr.rnic_open.req.user_context = (unsigned long) c2dev; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + if ((err = c2_errno(reply)) != 0) { + goto bail1; + } + + c2dev->adapter_handle = reply->rnic_handle; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Close the RNIC instance + */ +static int c2_rnic_close(struct c2_dev *c2dev) +{ + struct c2_vq_req *vq_req; + union c2wr wr; + struct c2wr_rnic_close_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + return -ENOMEM; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_RNIC_CLOSE); + wr.rnic_close.req.hdr.context = (unsigned long) vq_req; + wr.rnic_close.req.rnic_handle = c2dev->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + if ((err = c2_errno(reply)) != 0) { + goto bail1; + } + + c2dev->adapter_handle = 0; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Called by c2_probe to initialize the RNIC. This principally + * involves initalizing the various limits and resouce pools that + * comprise the RNIC instance. + */ +int __devinit c2_rnic_init(struct c2_dev *c2dev) +{ + int err; + u32 qsize, msgsize; + void *q1_pages; + void *q2_pages; + void __iomem *mmio_regs; + + /* Device capabilities */ + c2dev->device_cap_flags = + (IB_DEVICE_RESIZE_MAX_WR | + IB_DEVICE_CURR_QP_STATE_MOD | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW); + + /* Allocate the qptr_array */ + c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *)); + if (!c2dev->qptr_array) { + return -ENOMEM; + } + + /* Initialize the qptr_array */ + c2dev->qptr_array[0] = (void *) &c2dev->req_vq; + c2dev->qptr_array[1] = (void *) &c2dev->rep_vq; + c2dev->qptr_array[2] = (void *) &c2dev->aeq; + + /* Initialize data structures */ + init_waitqueue_head(&c2dev->req_vq_wo); + spin_lock_init(&c2dev->vqlock); + spin_lock_init(&c2dev->lock); + + /* Allocate MQ shared pointer pool for kernel clients. User + * mode client pools are hung off the user context + */ + err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool); + if (err) { + goto bail0; + } + + /* Allocate shared pointers for Q0, Q1, and Q2 from + * the shared pointer pool. + */ + + c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->hint_count_dma, + GFP_KERNEL); + c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->req_vq.shared_dma, + GFP_KERNEL); + c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->rep_vq.shared_dma, + GFP_KERNEL); + c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->aeq.shared_dma, GFP_KERNEL); + if (!c2dev->hint_count || !c2dev->req_vq.shared || + !c2dev->rep_vq.shared || !c2dev->aeq.shared) { + err = -ENOMEM; + goto bail1; + } + + mmio_regs = c2dev->kva; + /* Initialize the Verbs Request Queue */ + c2_mq_req_init(&c2dev->req_vq, 0, + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)), + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)), + mmio_regs + + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)), + mmio_regs + + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)), + C2_MQ_ADAPTER_TARGET); + + /* Initialize the Verbs Reply Queue */ + qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE)); + msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE)); + q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, + &c2dev->rep_vq.host_dma, GFP_KERNEL); + if (!q1_pages) { + err = -ENOMEM; + goto bail1; + } + dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); + pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages, + (unsigned long long) c2dev->rep_vq.host_dma); + c2_mq_rep_init(&c2dev->rep_vq, + 1, + qsize, + msgsize, + q1_pages, + mmio_regs + + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)), + C2_MQ_HOST_TARGET); + + /* Initialize the Asynchronus Event Queue */ + qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE)); + msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE)); + q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, + &c2dev->aeq.host_dma, GFP_KERNEL); + if (!q2_pages) { + err = -ENOMEM; + goto bail2; + } + dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); + pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages, + (unsigned long long) c2dev->aeq.host_dma); + c2_mq_rep_init(&c2dev->aeq, + 2, + qsize, + msgsize, + q2_pages, + mmio_regs + + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)), + C2_MQ_HOST_TARGET); + + /* Initialize the verbs request allocator */ + err = vq_init(c2dev); + if (err) + goto bail3; + + /* Enable interrupts on the adapter */ + writel(0, c2dev->regs + C2_IDIS); + + /* create the WR init message */ + err = c2_adapter_init(c2dev); + if (err) + goto bail4; + c2dev->init++; + + /* open an adapter instance */ + err = c2_rnic_open(c2dev); + if (err) + goto bail4; + + /* Initialize cached the adapter limits */ + if (c2_rnic_query(c2dev, &c2dev->props)) + goto bail5; + + /* Initialize the PD pool */ + err = c2_init_pd_table(c2dev); + if (err) + goto bail5; + + /* Initialize the QP pool */ + c2_init_qp_table(c2dev); + return 0; + + bail5: + c2_rnic_close(c2dev); + bail4: + vq_term(c2dev); + bail3: + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->aeq.q_size * c2dev->aeq.msg_size, + q2_pages, dma_unmap_addr(&c2dev->aeq, mapping)); + bail2: + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping)); + bail1: + c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); + bail0: + vfree(c2dev->qptr_array); + + return err; +} + +/* + * Called by c2_remove to cleanup the RNIC resources. + */ +void __devexit c2_rnic_term(struct c2_dev *c2dev) +{ + + /* Close the open adapter instance */ + c2_rnic_close(c2dev); + + /* Send the TERM message to the adapter */ + c2_adapter_term(c2dev); + + /* Disable interrupts on the adapter */ + writel(1, c2dev->regs + C2_IDIS); + + /* Free the QP pool */ + c2_cleanup_qp_table(c2dev); + + /* Free the PD pool */ + c2_cleanup_pd_table(c2dev); + + /* Free the verbs request allocator */ + vq_term(c2dev); + + /* Free the asynchronus event queue */ + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->aeq.q_size * c2dev->aeq.msg_size, + c2dev->aeq.msg_pool.host, + dma_unmap_addr(&c2dev->aeq, mapping)); + + /* Free the verbs reply queue */ + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + c2dev->rep_vq.msg_pool.host, + dma_unmap_addr(&c2dev->rep_vq, mapping)); + + /* Free the MQ shared pointer pool */ + c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); + + /* Free the qptr_array */ + vfree(c2dev->qptr_array); + + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h new file mode 100644 index 00000000..6ee4aa92 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_status.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_STATUS_H_ +#define _C2_STATUS_H_ + +/* + * Verbs Status Codes + */ +enum c2_status { + C2_OK = 0, /* This must be zero */ + CCERR_INSUFFICIENT_RESOURCES = 1, + CCERR_INVALID_MODIFIER = 2, + CCERR_INVALID_MODE = 3, + CCERR_IN_USE = 4, + CCERR_INVALID_RNIC = 5, + CCERR_INTERRUPTED_OPERATION = 6, + CCERR_INVALID_EH = 7, + CCERR_INVALID_CQ = 8, + CCERR_CQ_EMPTY = 9, + CCERR_NOT_IMPLEMENTED = 10, + CCERR_CQ_DEPTH_TOO_SMALL = 11, + CCERR_PD_IN_USE = 12, + CCERR_INVALID_PD = 13, + CCERR_INVALID_SRQ = 14, + CCERR_INVALID_ADDRESS = 15, + CCERR_INVALID_NETMASK = 16, + CCERR_INVALID_QP = 17, + CCERR_INVALID_QP_STATE = 18, + CCERR_TOO_MANY_WRS_POSTED = 19, + CCERR_INVALID_WR_TYPE = 20, + CCERR_INVALID_SGL_LENGTH = 21, + CCERR_INVALID_SQ_DEPTH = 22, + CCERR_INVALID_RQ_DEPTH = 23, + CCERR_INVALID_ORD = 24, + CCERR_INVALID_IRD = 25, + CCERR_QP_ATTR_CANNOT_CHANGE = 26, + CCERR_INVALID_STAG = 27, + CCERR_QP_IN_USE = 28, + CCERR_OUTSTANDING_WRS = 29, + CCERR_STAG_IN_USE = 30, + CCERR_INVALID_STAG_INDEX = 31, + CCERR_INVALID_SGL_FORMAT = 32, + CCERR_ADAPTER_TIMEOUT = 33, + CCERR_INVALID_CQ_DEPTH = 34, + CCERR_INVALID_PRIVATE_DATA_LENGTH = 35, + CCERR_INVALID_EP = 36, + CCERR_MR_IN_USE = CCERR_STAG_IN_USE, + CCERR_FLUSHED = 38, + CCERR_INVALID_WQE = 39, + CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40, + CCERR_REMOTE_TERMINATION_ERROR = 41, + CCERR_BASE_AND_BOUNDS_VIOLATION = 42, + CCERR_ACCESS_VIOLATION = 43, + CCERR_INVALID_PD_ID = 44, + CCERR_WRAP_ERROR = 45, + CCERR_INV_STAG_ACCESS_ERROR = 46, + CCERR_ZERO_RDMA_READ_RESOURCES = 47, + CCERR_QP_NOT_PRIVILEGED = 48, + CCERR_STAG_STATE_NOT_INVALID = 49, + CCERR_INVALID_PAGE_SIZE = 50, + CCERR_INVALID_BUFFER_SIZE = 51, + CCERR_INVALID_PBE = 52, + CCERR_INVALID_FBO = 53, + CCERR_INVALID_LENGTH = 54, + CCERR_INVALID_ACCESS_RIGHTS = 55, + CCERR_PBL_TOO_BIG = 56, + CCERR_INVALID_VA = 57, + CCERR_INVALID_REGION = 58, + CCERR_INVALID_WINDOW = 59, + CCERR_TOTAL_LENGTH_TOO_BIG = 60, + CCERR_INVALID_QP_ID = 61, + CCERR_ADDR_IN_USE = 62, + CCERR_ADDR_NOT_AVAIL = 63, + CCERR_NET_DOWN = 64, + CCERR_NET_UNREACHABLE = 65, + CCERR_CONN_ABORTED = 66, + CCERR_CONN_RESET = 67, + CCERR_NO_BUFS = 68, + CCERR_CONN_TIMEDOUT = 69, + CCERR_CONN_REFUSED = 70, + CCERR_HOST_UNREACHABLE = 71, + CCERR_INVALID_SEND_SGL_DEPTH = 72, + CCERR_INVALID_RECV_SGL_DEPTH = 73, + CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74, + CCERR_INSUFFICIENT_PRIVILEGES = 75, + CCERR_STACK_ERROR = 76, + CCERR_INVALID_VERSION = 77, + CCERR_INVALID_MTU = 78, + CCERR_INVALID_IMAGE = 79, + CCERR_PENDING = 98, /* not an error; user internally by adapter */ + CCERR_DEFER = 99, /* not an error; used internally by adapter */ + CCERR_FAILED_WRITE = 100, + CCERR_FAILED_ERASE = 101, + CCERR_FAILED_VERIFICATION = 102, + CCERR_NOT_FOUND = 103, + +}; + +/* + * CCAE_ACTIVE_CONNECT_RESULTS status result codes. + */ +enum c2_connect_status { + C2_CONN_STATUS_SUCCESS = C2_OK, + C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES, + C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT, + C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED, + C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE, + C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE, + C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC, + C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP, + C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE, + C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET, + C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL, +}; + +/* + * Flash programming status codes. + */ +enum c2_flash_status { + C2_FLASH_STATUS_SUCCESS = 0x0000, + C2_FLASH_STATUS_VERIFY_ERR = 0x0002, + C2_FLASH_STATUS_IMAGE_ERR = 0x0004, + C2_FLASH_STATUS_ECLBS = 0x0400, + C2_FLASH_STATUS_PSLBS = 0x0800, + C2_FLASH_STATUS_VPENS = 0x1000, +}; + +#endif /* _C2_STATUS_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h new file mode 100644 index 00000000..7e9e7ad6 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_user.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef C2_USER_H +#define C2_USER_H + +#include + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct c2_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct c2_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct c2_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct c2_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct c2_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* C2_USER_H */ diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c new file mode 100644 index 00000000..2ec716fb --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +#include "c2_vq.h" +#include "c2_provider.h" + +/* + * Verbs Request Objects: + * + * VQ Request Objects are allocated by the kernel verbs handlers. + * They contain a wait object, a refcnt, an atomic bool indicating that the + * adapter has replied, and a copy of the verb reply work request. + * A pointer to the VQ Request Object is passed down in the context + * field of the work request message, and reflected back by the adapter + * in the verbs reply message. The function handle_vq() in the interrupt + * path will use this pointer to: + * 1) append a copy of the verbs reply message + * 2) mark that the reply is ready + * 3) wake up the kernel verbs handler blocked awaiting the reply. + * + * + * The kernel verbs handlers do a "get" to put a 2nd reference on the + * VQ Request object. If the kernel verbs handler exits before the adapter + * can respond, this extra reference will keep the VQ Request object around + * until the adapter's reply can be processed. The reason we need this is + * because a pointer to this object is stuffed into the context field of + * the verbs work request message, and reflected back in the reply message. + * It is used in the interrupt handler (handle_vq()) to wake up the appropriate + * kernel verb handler that is blocked awaiting the verb reply. + * So handle_vq() will do a "put" on the object when it's done accessing it. + * NOTE: If we guarantee that the kernel verb handler will never bail before + * getting the reply, then we don't need these refcnts. + * + * + * VQ Request objects are freed by the kernel verbs handlers only + * after the verb has been processed, or when the adapter fails and + * does not reply. + * + * + * Verbs Reply Buffers: + * + * VQ Reply bufs are local host memory copies of a + * outstanding Verb Request reply + * message. The are always allocated by the kernel verbs handlers, and _may_ be + * freed by either the kernel verbs handler -or- the interrupt handler. The + * kernel verbs handler _must_ free the repbuf, then free the vq request object + * in that order. + */ + +int vq_init(struct c2_dev *c2dev) +{ + sprintf(c2dev->vq_cache_name, "c2-vq:dev%c", + (char) ('0' + c2dev->devnum)); + c2dev->host_msg_cache = + kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (c2dev->host_msg_cache == NULL) { + return -ENOMEM; + } + return 0; +} + +void vq_term(struct c2_dev *c2dev) +{ + kmem_cache_destroy(c2dev->host_msg_cache); +} + +/* vq_req_alloc - allocate a VQ Request Object and initialize it. + * The refcnt is set to 1. + */ +struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev) +{ + struct c2_vq_req *r; + + r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL); + if (r) { + init_waitqueue_head(&r->wait_object); + r->reply_msg = 0; + r->event = 0; + r->cm_id = NULL; + r->qp = NULL; + atomic_set(&r->refcnt, 1); + atomic_set(&r->reply_ready, 0); + } + return r; +} + + +/* vq_req_free - free the VQ Request Object. It is assumed the verbs handler + * has already free the VQ Reply Buffer if it existed. + */ +void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + r->reply_msg = 0; + if (atomic_dec_and_test(&r->refcnt)) { + kfree(r); + } +} + +/* vq_req_get - reference a VQ Request Object. Done + * only in the kernel verbs handlers. + */ +void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + atomic_inc(&r->refcnt); +} + + +/* vq_req_put - dereference and potentially free a VQ Request Object. + * + * This is only called by handle_vq() on the + * interrupt when it is done processing + * a verb reply message. If the associated + * kernel verbs handler has already bailed, + * then this put will actually free the VQ + * Request object _and_ the VQ Reply Buffer + * if it exists. + */ +void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + if (atomic_dec_and_test(&r->refcnt)) { + if (r->reply_msg != 0) + vq_repbuf_free(c2dev, + (void *) (unsigned long) r->reply_msg); + kfree(r); + } +} + + +/* + * vq_repbuf_alloc - allocate a VQ Reply Buffer. + */ +void *vq_repbuf_alloc(struct c2_dev *c2dev) +{ + return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC); +} + +/* + * vq_send_wr - post a verbs request message to the Verbs Request Queue. + * If a message is not available in the MQ, then block until one is available. + * NOTE: handle_mq() on the interrupt context will wake up threads blocked here. + * When the adapter drains the Verbs Request Queue, + * it inserts MQ index 0 in to the + * adapter->host activity fifo and interrupts the host. + */ +int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr) +{ + void *msg; + wait_queue_t __wait; + + /* + * grab adapter vq lock + */ + spin_lock(&c2dev->vqlock); + + /* + * allocate msg + */ + msg = c2_mq_alloc(&c2dev->req_vq); + + /* + * If we cannot get a msg, then we'll wait + * When a messages are available, the int handler will wake_up() + * any waiters. + */ + while (msg == NULL) { + pr_debug("%s:%d no available msg in VQ, waiting...\n", + __func__, __LINE__); + init_waitqueue_entry(&__wait, current); + add_wait_queue(&c2dev->req_vq_wo, &__wait); + spin_unlock(&c2dev->vqlock); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (!c2_mq_full(&c2dev->req_vq)) { + break; + } + if (!signal_pending(current)) { + schedule_timeout(1 * HZ); /* 1 second... */ + continue; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&c2dev->req_vq_wo, &__wait); + return -EINTR; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&c2dev->req_vq_wo, &__wait); + spin_lock(&c2dev->vqlock); + msg = c2_mq_alloc(&c2dev->req_vq); + } + + /* + * copy wr into adapter msg + */ + memcpy(msg, wr, c2dev->req_vq.msg_size); + + /* + * post msg + */ + c2_mq_produce(&c2dev->req_vq); + + /* + * release adapter vq lock + */ + spin_unlock(&c2dev->vqlock); + return 0; +} + + +/* + * vq_wait_for_reply - block until the adapter posts a Verb Reply Message. + */ +int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req) +{ + if (!wait_event_timeout(req->wait_object, + atomic_read(&req->reply_ready), + 60*HZ)) + return -ETIMEDOUT; + + return 0; +} + +/* + * vq_repbuf_free - Free a Verbs Reply Buffer. + */ +void vq_repbuf_free(struct c2_dev *c2dev, void *reply) +{ + kmem_cache_free(c2dev->host_msg_cache, reply); +} diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h new file mode 100644 index 00000000..33805627 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_vq.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_VQ_H_ +#define _C2_VQ_H_ +#include +#include "c2.h" +#include "c2_wr.h" +#include "c2_provider.h" + +struct c2_vq_req { + u64 reply_msg; /* ptr to reply msg */ + wait_queue_head_t wait_object; /* wait object for vq reqs */ + atomic_t reply_ready; /* set when reply is ready */ + atomic_t refcnt; /* used to cancel WRs... */ + int event; + struct iw_cm_id *cm_id; + struct c2_qp *qp; +}; + +extern int vq_init(struct c2_dev *c2dev); +extern void vq_term(struct c2_dev *c2dev); + +extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev); +extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req); +extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req); +extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req); +extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr); + +extern void *vq_repbuf_alloc(struct c2_dev *c2dev); +extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply); + +extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req); +#endif /* _C2_VQ_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h new file mode 100644 index 00000000..8d4b4ca4 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_wr.h @@ -0,0 +1,1520 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_WR_H_ +#define _C2_WR_H_ + +#ifdef CCDEBUG +#define CCWR_MAGIC 0xb07700b0 +#endif + +#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF + +/* Maximum allowed size in bytes of private_data exchange + * on connect. + */ +#define C2_MAX_PRIVATE_DATA_SIZE 200 + +/* + * These types are shared among the adapter, host, and CCIL consumer. + */ +enum c2_cq_notification_type { + C2_CQ_NOTIFICATION_TYPE_NONE = 1, + C2_CQ_NOTIFICATION_TYPE_NEXT, + C2_CQ_NOTIFICATION_TYPE_NEXT_SE +}; + +enum c2_setconfig_cmd { + C2_CFG_ADD_ADDR = 1, + C2_CFG_DEL_ADDR = 2, + C2_CFG_ADD_ROUTE = 3, + C2_CFG_DEL_ROUTE = 4 +}; + +enum c2_getconfig_cmd { + C2_GETCONFIG_ROUTES = 1, + C2_GETCONFIG_ADDRS +}; + +/* + * CCIL Work Request Identifiers + */ +enum c2wr_ids { + CCWR_RNIC_OPEN = 1, + CCWR_RNIC_QUERY, + CCWR_RNIC_SETCONFIG, + CCWR_RNIC_GETCONFIG, + CCWR_RNIC_CLOSE, + CCWR_CQ_CREATE, + CCWR_CQ_QUERY, + CCWR_CQ_MODIFY, + CCWR_CQ_DESTROY, + CCWR_QP_CONNECT, + CCWR_PD_ALLOC, + CCWR_PD_DEALLOC, + CCWR_SRQ_CREATE, + CCWR_SRQ_QUERY, + CCWR_SRQ_MODIFY, + CCWR_SRQ_DESTROY, + CCWR_QP_CREATE, + CCWR_QP_QUERY, + CCWR_QP_MODIFY, + CCWR_QP_DESTROY, + CCWR_NSMR_STAG_ALLOC, + CCWR_NSMR_REGISTER, + CCWR_NSMR_PBL, + CCWR_STAG_DEALLOC, + CCWR_NSMR_REREGISTER, + CCWR_SMR_REGISTER, + CCWR_MR_QUERY, + CCWR_MW_ALLOC, + CCWR_MW_QUERY, + CCWR_EP_CREATE, + CCWR_EP_GETOPT, + CCWR_EP_SETOPT, + CCWR_EP_DESTROY, + CCWR_EP_BIND, + CCWR_EP_CONNECT, + CCWR_EP_LISTEN, + CCWR_EP_SHUTDOWN, + CCWR_EP_LISTEN_CREATE, + CCWR_EP_LISTEN_DESTROY, + CCWR_EP_QUERY, + CCWR_CR_ACCEPT, + CCWR_CR_REJECT, + CCWR_CONSOLE, + CCWR_TERM, + CCWR_FLASH_INIT, + CCWR_FLASH, + CCWR_BUF_ALLOC, + CCWR_BUF_FREE, + CCWR_FLASH_WRITE, + CCWR_INIT, /* WARNING: Don't move this ever again! */ + + + + /* Add new IDs here */ + + + + /* + * WARNING: CCWR_LAST must always be the last verbs id defined! + * All the preceding IDs are fixed, and must not change. + * You can add new IDs, but must not remove or reorder + * any IDs. If you do, YOU will ruin any hope of + * compatibility between versions. + */ + CCWR_LAST, + + /* + * Start over at 1 so that arrays indexed by user wr id's + * begin at 1. This is OK since the verbs and user wr id's + * are always used on disjoint sets of queues. + */ + /* + * The order of the CCWR_SEND_XX verbs must + * match the order of the RDMA_OPs + */ + CCWR_SEND = 1, + CCWR_SEND_INV, + CCWR_SEND_SE, + CCWR_SEND_SE_INV, + CCWR_RDMA_WRITE, + CCWR_RDMA_READ, + CCWR_RDMA_READ_INV, + CCWR_MW_BIND, + CCWR_NSMR_FASTREG, + CCWR_STAG_INVALIDATE, + CCWR_RECV, + CCWR_NOP, + CCWR_UNIMPL, +/* WARNING: This must always be the last user wr id defined! */ +}; +#define RDMA_SEND_OPCODE_FROM_WR_ID(x) (x+2) + +/* + * SQ/RQ Work Request Types + */ +enum c2_wr_type { + C2_WR_TYPE_SEND = CCWR_SEND, + C2_WR_TYPE_SEND_SE = CCWR_SEND_SE, + C2_WR_TYPE_SEND_INV = CCWR_SEND_INV, + C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV, + C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE, + C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ, + C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV, + C2_WR_TYPE_BIND_MW = CCWR_MW_BIND, + C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG, + C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE, + C2_WR_TYPE_RECV = CCWR_RECV, + C2_WR_TYPE_NOP = CCWR_NOP, +}; + +struct c2_netaddr { + __be32 ip_addr; + __be32 netmask; + u32 mtu; +}; + +struct c2_route { + u32 ip_addr; /* 0 indicates the default route */ + u32 netmask; /* netmask associated with dst */ + u32 flags; + union { + u32 ipaddr; /* address of the nexthop interface */ + u8 enaddr[6]; + } nexthop; +}; + +/* + * A Scatter Gather Entry. + */ +struct c2_data_addr { + __be32 stag; + __be32 length; + __be64 to; +}; + +/* + * MR and MW flags used by the consumer, RI, and RNIC. + */ +enum c2_mm_flags { + MEM_REMOTE = 0x0001, /* allow mw binds with remote access. */ + MEM_VA_BASED = 0x0002, /* Not Zero-based */ + MEM_PBL_COMPLETE = 0x0004, /* PBL array is complete in this msg */ + MEM_LOCAL_READ = 0x0008, /* allow local reads */ + MEM_LOCAL_WRITE = 0x0010, /* allow local writes */ + MEM_REMOTE_READ = 0x0020, /* allow remote reads */ + MEM_REMOTE_WRITE = 0x0040, /* allow remote writes */ + MEM_WINDOW_BIND = 0x0080, /* binds allowed */ + MEM_SHARED = 0x0100, /* set if MR is shared */ + MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */ +}; + +/* + * CCIL API ACF flags defined in terms of the low level mem flags. + * This minimizes translation needed in the user API + */ +enum c2_acf { + C2_ACF_LOCAL_READ = MEM_LOCAL_READ, + C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE, + C2_ACF_REMOTE_READ = MEM_REMOTE_READ, + C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE, + C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND +}; + +/* + * Image types of objects written to flash + */ +#define C2_FLASH_IMG_BITFILE 1 +#define C2_FLASH_IMG_OPTION_ROM 2 +#define C2_FLASH_IMG_VPD 3 + +/* + * to fix bug 1815 we define the max size allowable of the + * terminate message (per the IETF spec).Refer to the IETF + * protocol specification, section 12.1.6, page 64) + * The message is prefixed by 20 types of DDP info. + * + * Then the message has 6 bytes for the terminate control + * and DDP segment length info plus a DDP header (either + * 14 or 18 byts) plus 28 bytes for the RDMA header. + * Thus the max size in: + * 20 + (6 + 18 + 28) = 72 + */ +#define C2_MAX_TERMINATE_MESSAGE_SIZE (72) + +/* + * Build String Length. It must be the same as C2_BUILD_STR_LEN in ccil_api.h + */ +#define WR_BUILD_STR_LEN 64 + +/* + * WARNING: All of these structs need to align any 64bit types on + * 64 bit boundaries! 64bit types include u64 and u64. + */ + +/* + * Clustercore Work Request Header. Be sensitive to field layout + * and alignment. + */ +struct c2wr_hdr { + /* wqe_count is part of the cqe. It is put here so the + * adapter can write to it while the wr is pending without + * clobbering part of the wr. This word need not be dma'd + * from the host to adapter by libccil, but we copy it anyway + * to make the memcpy to the adapter better aligned. + */ + __be32 wqe_count; + + /* Put these fields next so that later 32- and 64-bit + * quantities are naturally aligned. + */ + u8 id; + u8 result; /* adapter -> host */ + u8 sge_count; /* host -> adapter */ + u8 flags; /* host -> adapter */ + + u64 context; +#ifdef CCMSGMAGIC + u32 magic; + u32 pad; +#endif +} __attribute__((packed)); + +/* + *------------------------ RNIC ------------------------ + */ + +/* + * WR_RNIC_OPEN + */ + +/* + * Flags for the RNIC WRs + */ +enum c2_rnic_flags { + RNIC_IRD_STATIC = 0x0001, + RNIC_ORD_STATIC = 0x0002, + RNIC_QP_STATIC = 0x0004, + RNIC_SRQ_SUPPORTED = 0x0008, + RNIC_PBL_BLOCK_MODE = 0x0010, + RNIC_SRQ_MODEL_ARRIVAL = 0x0020, + RNIC_CQ_OVF_DETECTED = 0x0040, + RNIC_PRIV_MODE = 0x0080 +}; + +struct c2wr_rnic_open_req { + struct c2wr_hdr hdr; + u64 user_context; + __be16 flags; /* See enum c2_rnic_flags */ + __be16 port_num; +} __attribute__((packed)); + +struct c2wr_rnic_open_rep { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +union c2wr_rnic_open { + struct c2wr_rnic_open_req req; + struct c2wr_rnic_open_rep rep; +} __attribute__((packed)); + +struct c2wr_rnic_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +/* + * WR_RNIC_QUERY + */ +struct c2wr_rnic_query_rep { + struct c2wr_hdr hdr; + u64 user_context; + __be32 vendor_id; + __be32 part_number; + __be32 hw_version; + __be32 fw_ver_major; + __be32 fw_ver_minor; + __be32 fw_ver_patch; + char fw_ver_build_str[WR_BUILD_STR_LEN]; + __be32 max_qps; + __be32 max_qp_depth; + u32 max_srq_depth; + u32 max_send_sgl_depth; + u32 max_rdma_sgl_depth; + __be32 max_cqs; + __be32 max_cq_depth; + u32 max_cq_event_handlers; + __be32 max_mrs; + u32 max_pbl_depth; + __be32 max_pds; + __be32 max_global_ird; + u32 max_global_ord; + __be32 max_qp_ird; + __be32 max_qp_ord; + u32 flags; + __be32 max_mws; + u32 pbe_range_low; + u32 pbe_range_high; + u32 max_srqs; + u32 page_size; +} __attribute__((packed)); + +union c2wr_rnic_query { + struct c2wr_rnic_query_req req; + struct c2wr_rnic_query_rep rep; +} __attribute__((packed)); + +/* + * WR_RNIC_GETCONFIG + */ + +struct c2wr_rnic_getconfig_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 option; /* see c2_getconfig_cmd_t */ + u64 reply_buf; + u32 reply_buf_len; +} __attribute__((packed)) ; + +struct c2wr_rnic_getconfig_rep { + struct c2wr_hdr hdr; + u32 option; /* see c2_getconfig_cmd_t */ + u32 count_len; /* length of the number of addresses configured */ +} __attribute__((packed)) ; + +union c2wr_rnic_getconfig { + struct c2wr_rnic_getconfig_req req; + struct c2wr_rnic_getconfig_rep rep; +} __attribute__((packed)) ; + +/* + * WR_RNIC_SETCONFIG + */ +struct c2wr_rnic_setconfig_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + __be32 option; /* See c2_setconfig_cmd_t */ + /* variable data and pad. See c2_netaddr and c2_route */ + u8 data[0]; +} __attribute__((packed)) ; + +struct c2wr_rnic_setconfig_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_rnic_setconfig { + struct c2wr_rnic_setconfig_req req; + struct c2wr_rnic_setconfig_rep rep; +} __attribute__((packed)) ; + +/* + * WR_RNIC_CLOSE + */ +struct c2wr_rnic_close_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)) ; + +struct c2wr_rnic_close_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_rnic_close { + struct c2wr_rnic_close_req req; + struct c2wr_rnic_close_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ CQ ------------------------ + */ +struct c2wr_cq_create_req { + struct c2wr_hdr hdr; + __be64 shared_ht; + u64 user_context; + __be64 msg_pool; + u32 rnic_handle; + __be32 msg_size; + __be32 depth; +} __attribute__((packed)) ; + +struct c2wr_cq_create_rep { + struct c2wr_hdr hdr; + __be32 mq_index; + __be32 adapter_shared; + u32 cq_handle; +} __attribute__((packed)) ; + +union c2wr_cq_create { + struct c2wr_cq_create_req req; + struct c2wr_cq_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_cq_modify_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 cq_handle; + u32 new_depth; + u64 new_msg_pool; +} __attribute__((packed)) ; + +struct c2wr_cq_modify_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_cq_modify { + struct c2wr_cq_modify_req req; + struct c2wr_cq_modify_rep rep; +} __attribute__((packed)) ; + +struct c2wr_cq_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 cq_handle; +} __attribute__((packed)) ; + +struct c2wr_cq_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_cq_destroy { + struct c2wr_cq_destroy_req req; + struct c2wr_cq_destroy_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ PD ------------------------ + */ +struct c2wr_pd_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_pd_alloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_pd_alloc { + struct c2wr_pd_alloc_req req; + struct c2wr_pd_alloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_pd_dealloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_pd_dealloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_pd_dealloc { + struct c2wr_pd_dealloc_req req; + struct c2wr_pd_dealloc_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ SRQ ------------------------ + */ +struct c2wr_srq_create_req { + struct c2wr_hdr hdr; + u64 shared_ht; + u64 user_context; + u32 rnic_handle; + u32 srq_depth; + u32 srq_limit; + u32 sgl_depth; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_srq_create_rep { + struct c2wr_hdr hdr; + u32 srq_depth; + u32 sgl_depth; + u32 msg_size; + u32 mq_index; + u32 mq_start; + u32 srq_handle; +} __attribute__((packed)) ; + +union c2wr_srq_create { + struct c2wr_srq_create_req req; + struct c2wr_srq_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_srq_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 srq_handle; +} __attribute__((packed)) ; + +struct c2wr_srq_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_srq_destroy { + struct c2wr_srq_destroy_req req; + struct c2wr_srq_destroy_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ QP ------------------------ + */ +enum c2wr_qp_flags { + QP_RDMA_READ = 0x00000001, /* RDMA read enabled? */ + QP_RDMA_WRITE = 0x00000002, /* RDMA write enabled? */ + QP_MW_BIND = 0x00000004, /* MWs enabled */ + QP_ZERO_STAG = 0x00000008, /* enabled? */ + QP_REMOTE_TERMINATION = 0x00000010, /* remote end terminated */ + QP_RDMA_READ_RESPONSE = 0x00000020 /* Remote RDMA read */ + /* enabled? */ +}; + +struct c2wr_qp_create_req { + struct c2wr_hdr hdr; + __be64 shared_sq_ht; + __be64 shared_rq_ht; + u64 user_context; + u32 rnic_handle; + u32 sq_cq_handle; + u32 rq_cq_handle; + __be32 sq_depth; + __be32 rq_depth; + u32 srq_handle; + u32 srq_limit; + __be32 flags; /* see enum c2wr_qp_flags */ + __be32 send_sgl_depth; + __be32 recv_sgl_depth; + __be32 rdma_write_sgl_depth; + __be32 ord; + __be32 ird; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_qp_create_rep { + struct c2wr_hdr hdr; + __be32 sq_depth; + __be32 rq_depth; + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u32 ord; + u32 ird; + __be32 sq_msg_size; + __be32 sq_mq_index; + __be32 sq_mq_start; + __be32 rq_msg_size; + __be32 rq_mq_index; + __be32 rq_mq_start; + u32 qp_handle; +} __attribute__((packed)) ; + +union c2wr_qp_create { + struct c2wr_qp_create_req req; + struct c2wr_qp_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_query_rep { + struct c2wr_hdr hdr; + u64 user_context; + u32 rnic_handle; + u32 sq_depth; + u32 rq_depth; + u32 send_sgl_depth; + u32 rdma_write_sgl_depth; + u32 recv_sgl_depth; + u32 ord; + u32 ird; + u16 qp_state; + u16 flags; /* see c2wr_qp_flags_t */ + u32 qp_id; + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; + u32 terminate_msg_length; /* 0 if not present */ + u8 data[0]; + /* Terminate Message in-line here. */ +} __attribute__((packed)) ; + +union c2wr_qp_query { + struct c2wr_qp_query_req req; + struct c2wr_qp_query_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_modify_req { + struct c2wr_hdr hdr; + u64 stream_msg; + u32 stream_msg_length; + u32 rnic_handle; + u32 qp_handle; + __be32 next_qp_state; + __be32 ord; + __be32 ird; + __be32 sq_depth; + __be32 rq_depth; + u32 llp_ep_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_modify_rep { + struct c2wr_hdr hdr; + u32 ord; + u32 ird; + u32 sq_depth; + u32 rq_depth; + u32 sq_msg_size; + u32 sq_mq_index; + u32 sq_mq_start; + u32 rq_msg_size; + u32 rq_mq_index; + u32 rq_mq_start; +} __attribute__((packed)) ; + +union c2wr_qp_modify { + struct c2wr_qp_modify_req req; + struct c2wr_qp_modify_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_qp_destroy { + struct c2wr_qp_destroy_req req; + struct c2wr_qp_destroy_rep rep; +} __attribute__((packed)) ; + +/* + * The CCWR_QP_CONNECT msg is posted on the verbs request queue. It can + * only be posted when a QP is in IDLE state. After the connect request is + * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state. + * No synchronous reply from adapter to this WR. The results of + * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS + * See c2wr_ae_active_connect_results_t + */ +struct c2wr_qp_connect_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; + __be32 remote_addr; + __be16 remote_port; + u16 pad; + __be32 private_data_length; + u8 private_data[0]; /* Private data in-line. */ +} __attribute__((packed)) ; + +struct c2wr_qp_connect { + struct c2wr_qp_connect_req req; + /* no synchronous reply. */ +} __attribute__((packed)) ; + + +/* + *------------------------ MM ------------------------ + */ + +struct c2wr_nsmr_stag_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pbl_depth; + u32 pd_id; + u32 flags; +} __attribute__((packed)) ; + +struct c2wr_nsmr_stag_alloc_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_stag_alloc { + struct c2wr_nsmr_stag_alloc_req req; + struct c2wr_nsmr_stag_alloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_register_req { + struct c2wr_hdr hdr; + __be64 va; + u32 rnic_handle; + __be16 flags; + u8 stag_key; + u8 pad; + u32 pd_id; + __be32 pbl_depth; + __be32 pbe_size; + __be32 fbo; + __be32 length; + __be32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + __be64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_register_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + __be32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_register { + struct c2wr_nsmr_register_req req; + struct c2wr_nsmr_register_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_pbl_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + __be32 flags; + __be32 stag_index; + __be32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + __be64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_pbl_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_nsmr_pbl { + struct c2wr_nsmr_pbl_req req; + struct c2wr_nsmr_pbl_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mr_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_mr_query_rep { + struct c2wr_hdr hdr; + u8 stag_key; + u8 pad[3]; + u32 pd_id; + u32 flags; + u32 pbl_depth; +} __attribute__((packed)) ; + +union c2wr_mr_query { + struct c2wr_mr_query_req req; + struct c2wr_mr_query_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mw_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_mw_query_rep { + struct c2wr_hdr hdr; + u8 stag_key; + u8 pad[3]; + u32 pd_id; + u32 flags; +} __attribute__((packed)) ; + +union c2wr_mw_query { + struct c2wr_mw_query_req req; + struct c2wr_mw_query_rep rep; +} __attribute__((packed)) ; + + +struct c2wr_stag_dealloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + __be32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_stag_dealloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_stag_dealloc { + struct c2wr_stag_dealloc_req req; + struct c2wr_stag_dealloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_reregister_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 stag_index; + u32 pd_id; + u32 pbl_depth; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + u32 pad1; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_reregister_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_reregister { + struct c2wr_nsmr_reregister_req req; + struct c2wr_nsmr_reregister_rep rep; +} __attribute__((packed)) ; + +struct c2wr_smr_register_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 stag_index; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_smr_register_rep { + struct c2wr_hdr hdr; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_smr_register { + struct c2wr_smr_register_req req; + struct c2wr_smr_register_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mw_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_mw_alloc_rep { + struct c2wr_hdr hdr; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_mw_alloc { + struct c2wr_mw_alloc_req req; + struct c2wr_mw_alloc_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ WRs ----------------------- + */ + +struct c2wr_user_hdr { + struct c2wr_hdr hdr; /* Has status and WR Type */ +} __attribute__((packed)) ; + +enum c2_qp_state { + C2_QP_STATE_IDLE = 0x01, + C2_QP_STATE_CONNECTING = 0x02, + C2_QP_STATE_RTS = 0x04, + C2_QP_STATE_CLOSING = 0x08, + C2_QP_STATE_TERMINATE = 0x10, + C2_QP_STATE_ERROR = 0x20, +}; + +/* Completion queue entry. */ +struct c2wr_ce { + struct c2wr_hdr hdr; /* Has status and WR Type */ + u64 qp_user_context; /* c2_user_qp_t * */ + u32 qp_state; /* Current QP State */ + u32 handle; /* QPID or EP Handle */ + __be32 bytes_rcvd; /* valid for RECV WCs */ + u32 stag; +} __attribute__((packed)) ; + + +/* + * Flags used for all post-sq WRs. These must fit in the flags + * field of the struct c2wr_hdr (eight bits). + */ +enum { + SQ_SIGNALED = 0x01, + SQ_READ_FENCE = 0x02, + SQ_FENCE = 0x04, +}; + +/* + * Common fields for all post-sq WRs. Namely the standard header and a + * secondary header with fields common to all post-sq WRs. + */ +struct c2_sq_hdr { + struct c2wr_user_hdr user_hdr; +} __attribute__((packed)); + +/* + * Same as above but for post-rq WRs. + */ +struct c2_rq_hdr { + struct c2wr_user_hdr user_hdr; +} __attribute__((packed)); + +/* + * use the same struct for all sends. + */ +struct c2wr_send_req { + struct c2_sq_hdr sq_hdr; + __be32 sge_len; + __be32 remote_stag; + u8 data[0]; /* SGE array */ +} __attribute__((packed)); + +union c2wr_send { + struct c2wr_send_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_rdma_write_req { + struct c2_sq_hdr sq_hdr; + __be64 remote_to; + __be32 remote_stag; + __be32 sge_len; + u8 data[0]; /* SGE array */ +} __attribute__((packed)); + +union c2wr_rdma_write { + struct c2wr_rdma_write_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_rdma_read_req { + struct c2_sq_hdr sq_hdr; + __be64 local_to; + __be64 remote_to; + __be32 local_stag; + __be32 remote_stag; + __be32 length; +} __attribute__((packed)); + +union c2wr_rdma_read { + struct c2wr_rdma_read_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_mw_bind_req { + struct c2_sq_hdr sq_hdr; + u64 va; + u8 stag_key; + u8 pad[3]; + u32 mw_stag_index; + u32 mr_stag_index; + u32 length; + u32 flags; +} __attribute__((packed)); + +union c2wr_mw_bind { + struct c2wr_mw_bind_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_nsmr_fastreg_req { + struct c2_sq_hdr sq_hdr; + u64 va; + u8 stag_key; + u8 pad[3]; + u32 stag_index; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)); + +union c2wr_nsmr_fastreg { + struct c2wr_nsmr_fastreg_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_stag_invalidate_req { + struct c2_sq_hdr sq_hdr; + u8 stag_key; + u8 pad[3]; + u32 stag_index; +} __attribute__((packed)); + +union c2wr_stag_invalidate { + struct c2wr_stag_invalidate_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +union c2wr_sqwr { + struct c2_sq_hdr sq_hdr; + struct c2wr_send_req send; + struct c2wr_send_req send_se; + struct c2wr_send_req send_inv; + struct c2wr_send_req send_se_inv; + struct c2wr_rdma_write_req rdma_write; + struct c2wr_rdma_read_req rdma_read; + struct c2wr_mw_bind_req mw_bind; + struct c2wr_nsmr_fastreg_req nsmr_fastreg; + struct c2wr_stag_invalidate_req stag_inv; +} __attribute__((packed)); + + +/* + * RQ WRs + */ +struct c2wr_rqwr { + struct c2_rq_hdr rq_hdr; + u8 data[0]; /* array of SGEs */ +} __attribute__((packed)); + +union c2wr_recv { + struct c2wr_rqwr req; + struct c2wr_ce rep; +} __attribute__((packed)); + +/* + * All AEs start with this header. Most AEs only need to convey the + * information in the header. Some, like LLP connection events, need + * more info. The union typdef c2wr_ae_t has all the possible AEs. + * + * hdr.context is the user_context from the rnic_open WR. NULL If this + * is not affiliated with an rnic + * + * hdr.id is the AE identifier (eg; CCAE_REMOTE_SHUTDOWN, + * CCAE_LLP_CLOSE_COMPLETE) + * + * resource_type is one of: C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ + * + * user_context is the context passed down when the host created the resource. + */ +struct c2wr_ae_hdr { + struct c2wr_hdr hdr; + u64 user_context; /* user context for this res. */ + __be32 resource_type; /* see enum c2_resource_indicator */ + __be32 resource; /* handle for resource */ + __be32 qp_state; /* current QP State */ +} __attribute__((packed)); + +/* + * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ, + * the adapter moves the QP into RTS state + */ +struct c2wr_ae_active_connect_results { + struct c2wr_ae_hdr ae_hdr; + __be32 laddr; + __be32 raddr; + __be16 lport; + __be16 rport; + __be32 private_data_length; + u8 private_data[0]; /* data is in-line in the msg. */ +} __attribute__((packed)); + +/* + * When connections are established by the stack (and the private data + * MPA frame is received), the adapter will generate an event to the host. + * The details of the connection, any private data, and the new connection + * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the + * AE queue: + */ +struct c2wr_ae_connection_request { + struct c2wr_ae_hdr ae_hdr; + u32 cr_handle; /* connreq handle (sock ptr) */ + __be32 laddr; + __be32 raddr; + __be16 lport; + __be16 rport; + __be32 private_data_length; + u8 private_data[0]; /* data is in-line in the msg. */ +} __attribute__((packed)); + +union c2wr_ae { + struct c2wr_ae_hdr ae_generic; + struct c2wr_ae_active_connect_results ae_active_connect_results; + struct c2wr_ae_connection_request ae_connection_request; +} __attribute__((packed)); + +struct c2wr_init_req { + struct c2wr_hdr hdr; + __be64 hint_count; + __be64 q0_host_shared; + __be64 q1_host_shared; + __be64 q1_host_msg_pool; + __be64 q2_host_shared; + __be64 q2_host_msg_pool; +} __attribute__((packed)); + +struct c2wr_init_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_init { + struct c2wr_init_req req; + struct c2wr_init_rep rep; +} __attribute__((packed)); + +/* + * For upgrading flash. + */ + +struct c2wr_flash_init_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +struct c2wr_flash_init_rep { + struct c2wr_hdr hdr; + u32 adapter_flash_buf_offset; + u32 adapter_flash_len; +} __attribute__((packed)); + +union c2wr_flash_init { + struct c2wr_flash_init_req req; + struct c2wr_flash_init_rep rep; +} __attribute__((packed)); + +struct c2wr_flash_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 len; +} __attribute__((packed)); + +struct c2wr_flash_rep { + struct c2wr_hdr hdr; + u32 status; +} __attribute__((packed)); + +union c2wr_flash { + struct c2wr_flash_req req; + struct c2wr_flash_rep rep; +} __attribute__((packed)); + +struct c2wr_buf_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 size; +} __attribute__((packed)); + +struct c2wr_buf_alloc_rep { + struct c2wr_hdr hdr; + u32 offset; /* 0 if mem not available */ + u32 size; /* 0 if mem not available */ +} __attribute__((packed)); + +union c2wr_buf_alloc { + struct c2wr_buf_alloc_req req; + struct c2wr_buf_alloc_rep rep; +} __attribute__((packed)); + +struct c2wr_buf_free_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 offset; /* Must match value from alloc */ + u32 size; /* Must match value from alloc */ +} __attribute__((packed)); + +struct c2wr_buf_free_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_buf_free { + struct c2wr_buf_free_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_flash_write_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 offset; + u32 size; + u32 type; + u32 flags; +} __attribute__((packed)); + +struct c2wr_flash_write_rep { + struct c2wr_hdr hdr; + u32 status; +} __attribute__((packed)); + +union c2wr_flash_write { + struct c2wr_flash_write_req req; + struct c2wr_flash_write_rep rep; +} __attribute__((packed)); + +/* + * Messages for LLP connection setup. + */ + +/* + * Listen Request. This allocates a listening endpoint to allow passive + * connection setup. Newly established LLP connections are passed up + * via an AE. See c2wr_ae_connection_request_t + */ +struct c2wr_ep_listen_create_req { + struct c2wr_hdr hdr; + u64 user_context; /* returned in AEs. */ + u32 rnic_handle; + __be32 local_addr; /* local addr, or 0 */ + __be16 local_port; /* 0 means "pick one" */ + u16 pad; + __be32 backlog; /* tradional tcp listen bl */ +} __attribute__((packed)); + +struct c2wr_ep_listen_create_rep { + struct c2wr_hdr hdr; + u32 ep_handle; /* handle to new listening ep */ + u16 local_port; /* resulting port... */ + u16 pad; +} __attribute__((packed)); + +union c2wr_ep_listen_create { + struct c2wr_ep_listen_create_req req; + struct c2wr_ep_listen_create_rep rep; +} __attribute__((packed)); + +struct c2wr_ep_listen_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; +} __attribute__((packed)); + +struct c2wr_ep_listen_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_ep_listen_destroy { + struct c2wr_ep_listen_destroy_req req; + struct c2wr_ep_listen_destroy_rep rep; +} __attribute__((packed)); + +struct c2wr_ep_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; +} __attribute__((packed)); + +struct c2wr_ep_query_rep { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; +} __attribute__((packed)); + +union c2wr_ep_query { + struct c2wr_ep_query_req req; + struct c2wr_ep_query_rep rep; +} __attribute__((packed)); + + +/* + * The host passes this down to indicate acceptance of a pending iWARP + * connection. The cr_handle was obtained from the CONNECTION_REQUEST + * AE passed up by the adapter. See c2wr_ae_connection_request_t. + */ +struct c2wr_cr_accept_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; /* QP to bind to this LLP conn */ + u32 ep_handle; /* LLP handle to accept */ + __be32 private_data_length; + u8 private_data[0]; /* data in-line in msg. */ +} __attribute__((packed)); + +/* + * adapter sends reply when private data is successfully submitted to + * the LLP. + */ +struct c2wr_cr_accept_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_cr_accept { + struct c2wr_cr_accept_req req; + struct c2wr_cr_accept_rep rep; +} __attribute__((packed)); + +/* + * The host sends this down if a given iWARP connection request was + * rejected by the consumer. The cr_handle was obtained from a + * previous c2wr_ae_connection_request_t AE sent by the adapter. + */ +struct c2wr_cr_reject_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; /* LLP handle to reject */ +} __attribute__((packed)); + +/* + * Dunno if this is needed, but we'll add it for now. The adapter will + * send the reject_reply after the LLP endpoint has been destroyed. + */ +struct c2wr_cr_reject_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_cr_reject { + struct c2wr_cr_reject_req req; + struct c2wr_cr_reject_rep rep; +} __attribute__((packed)); + +/* + * console command. Used to implement a debug console over the verbs + * request and reply queues. + */ + +/* + * Console request message. It contains: + * - message hdr with id = CCWR_CONSOLE + * - the physaddr/len of host memory to be used for the reply. + * - the command string. eg: "netstat -s" or "zoneinfo" + */ +struct c2wr_console_req { + struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ + u64 reply_buf; /* pinned host buf for reply */ + u32 reply_buf_len; /* length of reply buffer */ + u8 command[0]; /* NUL terminated ascii string */ + /* containing the command req */ +} __attribute__((packed)); + +/* + * flags used in the console reply. + */ +enum c2_console_flags { + CONS_REPLY_TRUNCATED = 0x00000001 /* reply was truncated */ +} __attribute__((packed)); + +/* + * Console reply message. + * hdr.result contains the c2_status_t error if the reply was _not_ generated, + * or C2_OK if the reply was generated. + */ +struct c2wr_console_rep { + struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ + u32 flags; +} __attribute__((packed)); + +union c2wr_console { + struct c2wr_console_req req; + struct c2wr_console_rep rep; +} __attribute__((packed)); + + +/* + * Giant union with all WRs. Makes life easier... + */ +union c2wr { + struct c2wr_hdr hdr; + struct c2wr_user_hdr user_hdr; + union c2wr_rnic_open rnic_open; + union c2wr_rnic_query rnic_query; + union c2wr_rnic_getconfig rnic_getconfig; + union c2wr_rnic_setconfig rnic_setconfig; + union c2wr_rnic_close rnic_close; + union c2wr_cq_create cq_create; + union c2wr_cq_modify cq_modify; + union c2wr_cq_destroy cq_destroy; + union c2wr_pd_alloc pd_alloc; + union c2wr_pd_dealloc pd_dealloc; + union c2wr_srq_create srq_create; + union c2wr_srq_destroy srq_destroy; + union c2wr_qp_create qp_create; + union c2wr_qp_query qp_query; + union c2wr_qp_modify qp_modify; + union c2wr_qp_destroy qp_destroy; + struct c2wr_qp_connect qp_connect; + union c2wr_nsmr_stag_alloc nsmr_stag_alloc; + union c2wr_nsmr_register nsmr_register; + union c2wr_nsmr_pbl nsmr_pbl; + union c2wr_mr_query mr_query; + union c2wr_mw_query mw_query; + union c2wr_stag_dealloc stag_dealloc; + union c2wr_sqwr sqwr; + struct c2wr_rqwr rqwr; + struct c2wr_ce ce; + union c2wr_ae ae; + union c2wr_init init; + union c2wr_ep_listen_create ep_listen_create; + union c2wr_ep_listen_destroy ep_listen_destroy; + union c2wr_cr_accept cr_accept; + union c2wr_cr_reject cr_reject; + union c2wr_console console; + union c2wr_flash_init flash_init; + union c2wr_flash flash; + union c2wr_buf_alloc buf_alloc; + union c2wr_buf_free buf_free; + union c2wr_flash_write flash_write; +} __attribute__((packed)); + + +/* + * Accessors for the wr fields that are packed together tightly to + * reduce the wr message size. The wr arguments are void* so that + * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types + * in the struct c2wr union can be passed in. + */ +static __inline__ u8 c2_wr_get_id(void *wr) +{ + return ((struct c2wr_hdr *) wr)->id; +} +static __inline__ void c2_wr_set_id(void *wr, u8 id) +{ + ((struct c2wr_hdr *) wr)->id = id; +} +static __inline__ u8 c2_wr_get_result(void *wr) +{ + return ((struct c2wr_hdr *) wr)->result; +} +static __inline__ void c2_wr_set_result(void *wr, u8 result) +{ + ((struct c2wr_hdr *) wr)->result = result; +} +static __inline__ u8 c2_wr_get_flags(void *wr) +{ + return ((struct c2wr_hdr *) wr)->flags; +} +static __inline__ void c2_wr_set_flags(void *wr, u8 flags) +{ + ((struct c2wr_hdr *) wr)->flags = flags; +} +static __inline__ u8 c2_wr_get_sge_count(void *wr) +{ + return ((struct c2wr_hdr *) wr)->sge_count; +} +static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count) +{ + ((struct c2wr_hdr *) wr)->sge_count = sge_count; +} +static __inline__ __be32 c2_wr_get_wqe_count(void *wr) +{ + return ((struct c2wr_hdr *) wr)->wqe_count; +} +static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count) +{ + ((struct c2wr_hdr *) wr)->wqe_count = wqe_count; +} + +#endif /* _C2_WR_H_ */ diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig new file mode 100644 index 00000000..2b6352b8 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -0,0 +1,27 @@ +config INFINIBAND_CXGB3 + tristate "Chelsio RDMA Driver" + depends on CHELSIO_T3 && INET + select GENERIC_ALLOCATOR + ---help--- + This is an iWARP/RDMA driver for the Chelsio T3 1GbE and + 10GbE adapters. + + For general information about Chelsio and our products, visit + our website at . + + For customer support, please visit our customer support page at + . + + Please send feedback to . + + To compile this driver as a module, choose M here: the module + will be called iw_cxgb3. + +config INFINIBAND_CXGB3_DEBUG + bool "Verbose debugging output" + depends on INFINIBAND_CXGB3 + default n + ---help--- + This option causes the Chelsio RDMA driver to produce copious + amounts of debug messages. Select this if you are developing + the driver or trying to diagnose a problem. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile new file mode 100644 index 00000000..27613641 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -0,0 +1,8 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3 + +obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o + +iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ + iwch_provider.o iwch.o cxio_hal.o cxio_resource.o + +ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c new file mode 100644 index 00000000..8bca6b4e --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef DEBUG +#include +#include +#include "common.h" +#include "cxgb3_ioctl.h" +#include "cxio_hal.h" +#include "cxio_wr.h" + +void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag) +{ + struct ch_mem_range *m; + u64 *data; + int rc; + int size = 32; + + m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); + if (!m) { + PDBG("%s couldn't allocate memory.\n", __func__); + return; + } + m->mem_id = MEM_PMRX; + m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base; + m->len = size; + PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + PDBG("%s toectl returned error %d\n", __func__, rc); + kfree(m); + return; + } + + data = (u64 *)m->buf; + while (size > 0) { + PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data); + size -= 8; + data++; + m->addr += 8; + } + kfree(m); +} + +void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift) +{ + struct ch_mem_range *m; + u64 *data; + int rc; + int size, npages; + + shift += 12; + npages = (len + (1ULL << shift) - 1) >> shift; + size = npages * sizeof(u64); + + m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); + if (!m) { + PDBG("%s couldn't allocate memory.\n", __func__); + return; + } + m->mem_id = MEM_PMRX; + m->addr = pbl_addr; + m->len = size; + PDBG("%s PBL addr 0x%x len %d depth %d\n", + __func__, m->addr, m->len, npages); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + PDBG("%s toectl returned error %d\n", __func__, rc); + kfree(m); + return; + } + + data = (u64 *)m->buf; + while (size > 0) { + PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data); + size -= 8; + data++; + m->addr += 8; + } + kfree(m); +} + +void cxio_dump_wqe(union t3_wr *wqe) +{ + __be64 *data = (__be64 *)wqe; + uint size = (uint)(be64_to_cpu(*data) & 0xff); + + if (size == 0) + size = 8; + while (size > 0) { + PDBG("WQE %p: %016llx\n", data, + (unsigned long long) be64_to_cpu(*data)); + size--; + data++; + } +} + +void cxio_dump_wce(struct t3_cqe *wce) +{ + __be64 *data = (__be64 *)wce; + int size = sizeof(*wce); + + while (size > 0) { + PDBG("WCE %p: %016llx\n", data, + (unsigned long long) be64_to_cpu(*data)); + size -= 8; + data++; + } +} + +void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents) +{ + struct ch_mem_range *m; + int size = nents * 64; + u64 *data; + int rc; + + m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); + if (!m) { + PDBG("%s couldn't allocate memory.\n", __func__); + return; + } + m->mem_id = MEM_PMRX; + m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base; + m->len = size; + PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + PDBG("%s toectl returned error %d\n", __func__, rc); + kfree(m); + return; + } + + data = (u64 *)m->buf; + while (size > 0) { + PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data); + size -= 8; + data++; + m->addr += 8; + } + kfree(m); +} + +void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid) +{ + struct ch_mem_range *m; + int size = TCB_SIZE; + u32 *data; + int rc; + + m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); + if (!m) { + PDBG("%s couldn't allocate memory.\n", __func__); + return; + } + m->mem_id = MEM_CM; + m->addr = hwtid * size; + m->len = size; + PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + PDBG("%s toectl returned error %d\n", __func__, rc); + kfree(m); + return; + } + + data = (u32 *)m->buf; + while (size > 0) { + printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n", + m->addr, + *(data+2), *(data+3), *(data),*(data+1), + *(data+6), *(data+7), *(data+4), *(data+5)); + size -= 32; + data += 8; + m->addr += 32; + } + kfree(m); +} +#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c new file mode 100644 index 00000000..c3f5aca4 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -0,0 +1,1345 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxio_resource.h" +#include "cxio_hal.h" +#include "cxgb3_offload.h" +#include "sge_defs.h" + +static LIST_HEAD(rdev_list); +static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; + +static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) +{ + struct cxio_rdev *rdev; + + list_for_each_entry(rdev, &rdev_list, entry) + if (!strcmp(rdev->dev_name, dev_name)) + return rdev; + return NULL; +} + +static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) +{ + struct cxio_rdev *rdev; + + list_for_each_entry(rdev, &rdev_list, entry) + if (rdev->t3cdev_p == tdev) + return rdev; + return NULL; +} + +int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, + enum t3_cq_opcode op, u32 credit) +{ + int ret; + struct t3_cqe *cqe; + u32 rptr; + + struct rdma_cq_op setup; + setup.id = cq->cqid; + setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0; + setup.op = op; + ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup); + + if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) + return ret; + + /* + * If the rearm returned an index other than our current index, + * then there might be CQE's in flight (being DMA'd). We must wait + * here for them to complete or the consumer can miss a notification. + */ + if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { + int i=0; + + rptr = cq->rptr; + + /* + * Keep the generation correct by bumping rptr until it + * matches the index returned by the rearm - 1. + */ + while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) + rptr++; + + /* + * Now rptr is the index for the (last) cqe that was + * in-flight at the time the HW rearmed the CQ. We + * spin until that CQE is valid. + */ + cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); + while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { + udelay(1); + if (i++ > 1000000) { + printk(KERN_ERR "%s: stalled rnic\n", + rdev_p->dev_name); + return -EIO; + } + } + + return 1; + } + + return 0; +} + +static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) +{ + struct rdma_cq_setup setup; + setup.id = cqid; + setup.base_addr = 0; /* NULL address */ + setup.size = 0; /* disaable the CQ */ + setup.credits = 0; + setup.credit_thres = 0; + setup.ovfl_mode = 0; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} + +static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) +{ + u64 sge_cmd; + struct t3_modify_qp_wr *wqe; + struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); + if (!skb) { + PDBG("%s alloc_skb failed\n", __func__); + return -ENOMEM; + } + wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof(*wqe)); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, + T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, + T3_SOPEOP); + wqe->flags = cpu_to_be32(MODQP_WRITE_EC); + sge_cmd = qpid << 8 | 3; + wqe->sge_cmd = cpu_to_be64(sge_cmd); + skb->priority = CPL_PRIORITY_CONTROL; + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); +} + +int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) +{ + struct rdma_cq_setup setup; + int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); + + size += 1; /* one extra page for storing cq-in-err state */ + cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); + if (!cq->cqid) + return -ENOMEM; + if (kernel) { + cq->sw_queue = kzalloc(size, GFP_KERNEL); + if (!cq->sw_queue) + return -ENOMEM; + } + cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, + &(cq->dma_addr), GFP_KERNEL); + if (!cq->queue) { + kfree(cq->sw_queue); + return -ENOMEM; + } + dma_unmap_addr_set(cq, mapping, cq->dma_addr); + memset(cq->queue, 0, size); + setup.id = cq->cqid; + setup.base_addr = (u64) (cq->dma_addr); + setup.size = 1UL << cq->size_log2; + setup.credits = 65535; + setup.credit_thres = 1; + if (rdev_p->t3cdev_p->type != T3A) + setup.ovfl_mode = 0; + else + setup.ovfl_mode = 1; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} + +#ifdef notyet +int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +{ + struct rdma_cq_setup setup; + setup.id = cq->cqid; + setup.base_addr = (u64) (cq->dma_addr); + setup.size = 1UL << cq->size_log2; + setup.credits = setup.size; + setup.credit_thres = setup.size; /* TBD: overflow recovery */ + setup.ovfl_mode = 1; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} +#endif + +static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) +{ + struct cxio_qpid_list *entry; + u32 qpid; + int i; + + mutex_lock(&uctx->lock); + if (!list_empty(&uctx->qpids)) { + entry = list_entry(uctx->qpids.next, struct cxio_qpid_list, + entry); + list_del(&entry->entry); + qpid = entry->qpid; + kfree(entry); + } else { + qpid = cxio_hal_get_qpid(rdev_p->rscp); + if (!qpid) + goto out; + for (i = qpid+1; i & rdev_p->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + break; + entry->qpid = i; + list_add_tail(&entry->entry, &uctx->qpids); + } + } +out: + mutex_unlock(&uctx->lock); + PDBG("%s qpid 0x%x\n", __func__, qpid); + return qpid; +} + +static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, + struct cxio_ucontext *uctx) +{ + struct cxio_qpid_list *entry; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return; + PDBG("%s qpid 0x%x\n", __func__, qpid); + entry->qpid = qpid; + mutex_lock(&uctx->lock); + list_add_tail(&entry->entry, &uctx->qpids); + mutex_unlock(&uctx->lock); +} + +void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) +{ + struct list_head *pos, *nxt; + struct cxio_qpid_list *entry; + + mutex_lock(&uctx->lock); + list_for_each_safe(pos, nxt, &uctx->qpids) { + entry = list_entry(pos, struct cxio_qpid_list, entry); + list_del_init(&entry->entry); + if (!(entry->qpid & rdev_p->qpmask)) + cxio_hal_put_qpid(rdev_p->rscp, entry->qpid); + kfree(entry); + } + mutex_unlock(&uctx->lock); +} + +void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) +{ + INIT_LIST_HEAD(&uctx->qpids); + mutex_init(&uctx->lock); +} + +int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, + struct t3_wq *wq, struct cxio_ucontext *uctx) +{ + int depth = 1UL << wq->size_log2; + int rqsize = 1UL << wq->rq_size_log2; + + wq->qpid = get_qpid(rdev_p, uctx); + if (!wq->qpid) + return -ENOMEM; + + wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL); + if (!wq->rq) + goto err1; + + wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); + if (!wq->rq_addr) + goto err2; + + wq->sq = kzalloc(depth * sizeof(struct t3_swsq), GFP_KERNEL); + if (!wq->sq) + goto err3; + + wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), + depth * sizeof(union t3_wr), + &(wq->dma_addr), GFP_KERNEL); + if (!wq->queue) + goto err4; + + memset(wq->queue, 0, depth * sizeof(union t3_wr)); + dma_unmap_addr_set(wq, mapping, wq->dma_addr); + wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; + if (!kernel_domain) + wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + + (wq->qpid << rdev_p->qpshift); + wq->rdev = rdev_p; + PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, + wq->qpid, wq->doorbell, (unsigned long long) wq->udb); + return 0; +err4: + kfree(wq->sq); +err3: + cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); +err2: + kfree(wq->rq); +err1: + put_qpid(rdev_p, wq->qpid, uctx); + return -ENOMEM; +} + +int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +{ + int err; + err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); + kfree(cq->sw_queue); + dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), + (1UL << (cq->size_log2)) + * sizeof(struct t3_cqe), cq->queue, + dma_unmap_addr(cq, mapping)); + cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); + return err; +} + +int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, + struct cxio_ucontext *uctx) +{ + dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), + (1UL << (wq->size_log2)) + * sizeof(union t3_wr), wq->queue, + dma_unmap_addr(wq, mapping)); + kfree(wq->sq); + cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); + kfree(wq->rq); + put_qpid(rdev_p, wq->qpid, uctx); + return 0; +} + +static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) +{ + struct t3_cqe cqe; + + PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, + wq, cq, cq->sw_rptr, cq->sw_wptr); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | + V_CQE_OPCODE(T3_SEND) | + V_CQE_TYPE(0) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->qpid) | + V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, + cq->size_log2))); + *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; + cq->sw_wptr++; +} + +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) +{ + u32 ptr; + int flushed = 0; + + PDBG("%s wq %p cq %p\n", __func__, wq, cq); + + /* flush RQ */ + PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, + wq->rq_rptr, wq->rq_wptr, count); + ptr = wq->rq_rptr + count; + while (ptr++ != wq->rq_wptr) { + insert_recv_cqe(wq, cq); + flushed++; + } + return flushed; +} + +static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, + struct t3_swsq *sqp) +{ + struct t3_cqe cqe; + + PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, + wq, cq, cq->sw_rptr, cq->sw_wptr); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | + V_CQE_OPCODE(sqp->opcode) | + V_CQE_TYPE(1) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->qpid) | + V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, + cq->size_log2))); + cqe.u.scqe.wrid_hi = sqp->sq_wptr; + + *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; + cq->sw_wptr++; +} + +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) +{ + __u32 ptr; + int flushed = 0; + struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); + + ptr = wq->sq_rptr + count; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + while (ptr != wq->sq_wptr) { + sqp->signaled = 0; + insert_sq_cqe(wq, cq, sqp); + ptr++; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + flushed++; + } + return flushed; +} + +/* + * Move all CQEs from the HWCQ into the SWCQ. + */ +void cxio_flush_hw_cq(struct t3_cq *cq) +{ + struct t3_cqe *cqe, *swcqe; + + PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); + cqe = cxio_next_hw_cqe(cq); + while (cqe) { + PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n", + __func__, cq->rptr, cq->sw_wptr); + swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); + *swcqe = *cqe; + swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); + cq->sw_wptr++; + cq->rptr++; + cqe = cxio_next_hw_cqe(cq); + } +} + +static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) +{ + if (CQE_OPCODE(*cqe) == T3_TERMINATE) + return 0; + + if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) + return 0; + + if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) + return 0; + + if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && + Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) + return 0; + + return 1; +} + +void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) +{ + struct t3_cqe *cqe; + u32 ptr; + + *count = 0; + ptr = cq->sw_rptr; + while (!Q_EMPTY(ptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); + if ((SQ_TYPE(*cqe) || + ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && + (CQE_QPID(*cqe) == wq->qpid)) + (*count)++; + ptr++; + } + PDBG("%s cq %p count %d\n", __func__, cq, *count); +} + +void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) +{ + struct t3_cqe *cqe; + u32 ptr; + + *count = 0; + PDBG("%s count zero %d\n", __func__, *count); + ptr = cq->sw_rptr; + while (!Q_EMPTY(ptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); + if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && + (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) + (*count)++; + ptr++; + } + PDBG("%s cq %p count %d\n", __func__, cq, *count); +} + +static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) +{ + struct rdma_cq_setup setup; + setup.id = 0; + setup.base_addr = 0; /* NULL address */ + setup.size = 1; /* enable the CQ */ + setup.credits = 0; + + /* force SGE to redirect to RspQ and interrupt */ + setup.credit_thres = 0; + setup.ovfl_mode = 1; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} + +static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) +{ + int err; + u64 sge_cmd, ctx0, ctx1; + u64 base_addr; + struct t3_modify_qp_wr *wqe; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); + if (!skb) { + PDBG("%s alloc_skb failed\n", __func__); + return -ENOMEM; + } + err = cxio_hal_init_ctrl_cq(rdev_p); + if (err) { + PDBG("%s err %d initializing ctrl_cq\n", __func__, err); + goto err; + } + rdev_p->ctrl_qp.workq = dma_alloc_coherent( + &(rdev_p->rnic_info.pdev->dev), + (1 << T3_CTRL_QP_SIZE_LOG2) * + sizeof(union t3_wr), + &(rdev_p->ctrl_qp.dma_addr), + GFP_KERNEL); + if (!rdev_p->ctrl_qp.workq) { + PDBG("%s dma_alloc_coherent failed\n", __func__); + err = -ENOMEM; + goto err; + } + dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, + rdev_p->ctrl_qp.dma_addr); + rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; + memset(rdev_p->ctrl_qp.workq, 0, + (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr)); + + mutex_init(&rdev_p->ctrl_qp.lock); + init_waitqueue_head(&rdev_p->ctrl_qp.waitq); + + /* update HW Ctrl QP context */ + base_addr = rdev_p->ctrl_qp.dma_addr; + base_addr >>= 12; + ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | + V_EC_BASE_LO((u32) base_addr & 0xffff)); + ctx0 <<= 32; + ctx0 |= V_EC_CREDITS(FW_WR_NUM); + base_addr >>= 16; + ctx1 = (u32) base_addr; + base_addr >>= 32; + ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | + V_EC_TYPE(0) | V_EC_GEN(1) | + V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; + wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof(*wqe)); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, + T3_CTL_QP_TID, 7, T3_SOPEOP); + wqe->flags = cpu_to_be32(MODQP_WRITE_EC); + sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; + wqe->sge_cmd = cpu_to_be64(sge_cmd); + wqe->ctx1 = cpu_to_be64(ctx1); + wqe->ctx0 = cpu_to_be64(ctx0); + PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n", + (unsigned long long) rdev_p->ctrl_qp.dma_addr, + rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); + skb->priority = CPL_PRIORITY_CONTROL; + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); +err: + kfree_skb(skb); + return err; +} + +static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) +{ + dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), + (1UL << T3_CTRL_QP_SIZE_LOG2) + * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, + dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); + return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); +} + +/* write len bytes of data into addr (32B aligned address) + * If data is NULL, clear len byte of memory to zero. + * caller acquires the ctrl_qp lock before the call + */ +static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, + u32 len, void *data) +{ + u32 i, nr_wqe, copy_len; + u8 *copy_data; + u8 wr_len, utx_len; /* length in 8 byte flit */ + enum t3_wr_flags flag; + __be64 *wqe; + u64 utx_cmd; + addr &= 0x7FFFFFF; + nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ + PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n", + __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, + nr_wqe, data, addr); + utx_len = 3; /* in 32B unit */ + for (i = 0; i < nr_wqe; i++) { + if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, + T3_CTRL_QP_SIZE_LOG2)) { + PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, " + "wait for more space i %d\n", __func__, + rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); + if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, + !Q_FULL(rdev_p->ctrl_qp.rptr, + rdev_p->ctrl_qp.wptr, + T3_CTRL_QP_SIZE_LOG2))) { + PDBG("%s ctrl_qp workq interrupted\n", + __func__); + return -ERESTARTSYS; + } + PDBG("%s ctrl_qp wakeup, continue posting work request " + "i %d\n", __func__, i); + } + wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % + (1 << T3_CTRL_QP_SIZE_LOG2))); + flag = 0; + if (i == (nr_wqe - 1)) { + /* last WQE */ + flag = T3_COMPLETION_FLAG; + if (len % 32) + utx_len = len / 32 + 1; + else + utx_len = len / 32; + } + + /* + * Force a CQE to return the credit to the workq in case + * we posted more than half the max QP size of WRs + */ + if ((i != 0) && + (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { + flag = T3_COMPLETION_FLAG; + PDBG("%s force completion at i %d\n", __func__, i); + } + + /* build the utx mem command */ + wqe += (sizeof(struct t3_bypass_wr) >> 3); + utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); + utx_cmd <<= 32; + utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); + *wqe = cpu_to_be64(utx_cmd); + wqe++; + copy_data = (u8 *) data + i * 96; + copy_len = len > 96 ? 96 : len; + + /* clear memory content if data is NULL */ + if (data) + memcpy(wqe, copy_data, copy_len); + else + memset(wqe, 0, copy_len); + if (copy_len % 32) + memset(((u8 *) wqe) + copy_len, 0, + 32 - (copy_len % 32)); + wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + + (utx_len << 2); + wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % + (1 << T3_CTRL_QP_SIZE_LOG2))); + + /* wptr in the WRID[31:0] */ + ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; + + /* + * This must be the last write with a memory barrier + * for the genbit + */ + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, + Q_GENBIT(rdev_p->ctrl_qp.wptr, + T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, + wr_len, T3_SOPEOP); + if (flag == T3_COMPLETION_FLAG) + ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); + len -= 96; + rdev_p->ctrl_qp.wptr++; + } + return 0; +} + +/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr + * OUT: stag index + * TBD: shared memory region support + */ +static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, + u32 *stag, u8 stag_state, u32 pdid, + enum tpt_mem_type type, enum tpt_mem_perm perm, + u32 zbva, u64 to, u32 len, u8 page_size, + u32 pbl_size, u32 pbl_addr) +{ + int err; + struct tpt_entry tpt; + u32 stag_idx; + u32 wptr; + + if (cxio_fatal_error(rdev_p)) + return -EIO; + + stag_state = stag_state > 0; + stag_idx = (*stag) >> 8; + + if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { + stag_idx = cxio_hal_get_stag(rdev_p->rscp); + if (!stag_idx) + return -ENOMEM; + *stag = (stag_idx << 8) | ((*stag) & 0xFF); + } + PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", + __func__, stag_state, type, pdid, stag_idx); + + mutex_lock(&rdev_p->ctrl_qp.lock); + + /* write TPT entry */ + if (reset_tpt_entry) + memset(&tpt, 0, sizeof(tpt)); + else { + tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID | + V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | + V_TPT_STAG_STATE(stag_state) | + V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); + BUG_ON(page_size >= 28); + tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | + ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | + V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | + V_TPT_PAGE_SIZE(page_size)); + tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : + cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); + tpt.len = cpu_to_be32(len); + tpt.va_hi = cpu_to_be32((u32) (to >> 32)); + tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); + tpt.rsvd_bind_cnt_or_pstag = 0; + tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : + cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); + } + err = cxio_hal_ctrl_qp_write_mem(rdev_p, + stag_idx + + (rdev_p->rnic_info.tpt_base >> 5), + sizeof(tpt), &tpt); + + /* release the stag index to free pool */ + if (reset_tpt_entry) + cxio_hal_put_stag(rdev_p->rscp, stag_idx); + + wptr = rdev_p->ctrl_qp.wptr; + mutex_unlock(&rdev_p->ctrl_qp.lock); + if (!err) + if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, + SEQ32_GE(rdev_p->ctrl_qp.rptr, + wptr))) + return -ERESTARTSYS; + return err; +} + +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size) +{ + u32 wptr; + int err; + + PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + __func__, pbl_addr, rdev_p->rnic_info.pbl_base, + pbl_size); + + mutex_lock(&rdev_p->ctrl_qp.lock); + err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, + pbl); + wptr = rdev_p->ctrl_qp.wptr; + mutex_unlock(&rdev_p->ctrl_qp.lock); + if (err) + return err; + + if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, + SEQ32_GE(rdev_p->ctrl_qp.rptr, + wptr))) + return -ERESTARTSYS; + + return 0; +} + +int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, u32 pbl_size, u32 pbl_addr) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, + zbva, to, len, page_size, pbl_size, pbl_addr); +} + +int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, u32 pbl_size, u32 pbl_addr) +{ + return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, + zbva, to, len, page_size, pbl_size, pbl_addr); +} + +int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, + u32 pbl_addr) +{ + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + pbl_size, pbl_addr); +} + +int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, + 0, 0); +} + +int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) +{ + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + 0, 0); +} + +int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, + 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); +} + +int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) +{ + struct t3_rdma_init_wr *wqe; + struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + PDBG("%s rdev_p %p\n", __func__, rdev_p); + wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe)); + wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT)); + wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) | + V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); + wqe->wrid.id1 = 0; + wqe->qpid = cpu_to_be32(attr->qpid); + wqe->pdid = cpu_to_be32(attr->pdid); + wqe->scqid = cpu_to_be32(attr->scqid); + wqe->rcqid = cpu_to_be32(attr->rcqid); + wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base); + wqe->rq_size = cpu_to_be32(attr->rq_size); + wqe->mpaattrs = attr->mpaattrs; + wqe->qpcaps = attr->qpcaps; + wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); + wqe->rqe_count = cpu_to_be16(attr->rqe_count); + wqe->flags_rtr_type = cpu_to_be16(attr->flags | + V_RTR_TYPE(attr->rtr_type) | + V_CHAN(attr->chan)); + wqe->ord = cpu_to_be32(attr->ord); + wqe->ird = cpu_to_be32(attr->ird); + wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); + wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); + wqe->irs = cpu_to_be32(attr->irs); + skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); +} + +void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) +{ + cxio_ev_cb = ev_cb; +} + +void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb) +{ + cxio_ev_cb = NULL; +} + +static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) +{ + static int cnt; + struct cxio_rdev *rdev_p = NULL; + struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; + PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x" + " se %0x notify %0x cqbranch %0x creditth %0x\n", + cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), + RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg), + RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), + RSPQ_CREDIT_THRESH(rsp_msg)); + PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d " + "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", + CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe), + CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), + CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe), + CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); + rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; + if (!rdev_p) { + PDBG("%s called by t3cdev %p with null ulp\n", __func__, + t3cdev_p); + return 0; + } + if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) { + rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; + wake_up_interruptible(&rdev_p->ctrl_qp.waitq); + dev_kfree_skb_irq(skb); + } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8) + dev_kfree_skb_irq(skb); + else if (cxio_ev_cb) + (*cxio_ev_cb) (rdev_p, skb); + else + dev_kfree_skb_irq(skb); + cnt++; + return 0; +} + +/* Caller takes care of locking if needed */ +int cxio_rdev_open(struct cxio_rdev *rdev_p) +{ + struct net_device *netdev_p = NULL; + int err = 0; + if (strlen(rdev_p->dev_name)) { + if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { + return -EBUSY; + } + netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name); + if (!netdev_p) { + return -EINVAL; + } + dev_put(netdev_p); + } else if (rdev_p->t3cdev_p) { + if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) { + return -EBUSY; + } + netdev_p = rdev_p->t3cdev_p->lldev; + strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, + T3_MAX_DEV_NAME_LEN); + } else { + PDBG("%s t3cdev_p or dev_name must be set\n", __func__); + return -EINVAL; + } + + list_add_tail(&rdev_p->entry, &rdev_list); + + PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name); + memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); + if (!rdev_p->t3cdev_p) + rdev_p->t3cdev_p = dev2t3cdev(netdev_p); + rdev_p->t3cdev_p->ulp = (void *) rdev_p; + + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, + &(rdev_p->fw_info)); + if (err) { + printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", + __func__, rdev_p->t3cdev_p, err); + goto err1; + } + if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { + printk(KERN_ERR MOD "fatal firmware version mismatch: " + "need version %u but adapter has version %u\n", + CXIO_FW_MAJ, + G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); + err = -EINVAL; + goto err1; + } + + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, + &(rdev_p->rnic_info)); + if (err) { + printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", + __func__, rdev_p->t3cdev_p, err); + goto err1; + } + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, + &(rdev_p->port_info)); + if (err) { + printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", + __func__, rdev_p->t3cdev_p, err); + goto err1; + } + + /* + * qpshift is the number of bits to shift the qpid left in order + * to get the correct address of the doorbell for that qp. + */ + cxio_init_ucontext(rdev_p, &rdev_p->uctx); + rdev_p->qpshift = PAGE_SHIFT - + ilog2(65536 >> + ilog2(rdev_p->rnic_info.udbell_len >> + PAGE_SHIFT)); + rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; + rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; + PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d " + "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n", + __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, + rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p), + rdev_p->rnic_info.pbl_base, + rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, + rdev_p->rnic_info.rqt_top); + PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu " + "qpnr %d qpmask 0x%x\n", + rdev_p->rnic_info.udbell_len, + rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, + rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); + + err = cxio_hal_init_ctrl_qp(rdev_p); + if (err) { + printk(KERN_ERR "%s error %d initializing ctrl_qp.\n", + __func__, err); + goto err1; + } + err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, + 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, + T3_MAX_NUM_PD); + if (err) { + printk(KERN_ERR "%s error %d initializing hal resources.\n", + __func__, err); + goto err2; + } + err = cxio_hal_pblpool_create(rdev_p); + if (err) { + printk(KERN_ERR "%s error %d initializing pbl mem pool.\n", + __func__, err); + goto err3; + } + err = cxio_hal_rqtpool_create(rdev_p); + if (err) { + printk(KERN_ERR "%s error %d initializing rqt mem pool.\n", + __func__, err); + goto err4; + } + return 0; +err4: + cxio_hal_pblpool_destroy(rdev_p); +err3: + cxio_hal_destroy_resource(rdev_p->rscp); +err2: + cxio_hal_destroy_ctrl_qp(rdev_p); +err1: + rdev_p->t3cdev_p->ulp = NULL; + list_del(&rdev_p->entry); + return err; +} + +void cxio_rdev_close(struct cxio_rdev *rdev_p) +{ + if (rdev_p) { + cxio_hal_pblpool_destroy(rdev_p); + cxio_hal_rqtpool_destroy(rdev_p); + list_del(&rdev_p->entry); + cxio_hal_destroy_ctrl_qp(rdev_p); + cxio_hal_destroy_resource(rdev_p->rscp); + rdev_p->t3cdev_p->ulp = NULL; + } +} + +int __init cxio_hal_init(void) +{ + if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) + return -ENOMEM; + t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler); + return 0; +} + +void __exit cxio_hal_exit(void) +{ + struct cxio_rdev *rdev, *tmp; + + t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL); + list_for_each_entry_safe(rdev, tmp, &rdev_list, entry) + cxio_rdev_close(rdev); + cxio_hal_destroy_rhdl_resource(); +} + +static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) +{ + struct t3_swsq *sqp; + __u32 ptr = wq->sq_rptr; + int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); + + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + while (count--) + if (!sqp->signaled) { + ptr++; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + } else if (sqp->complete) { + + /* + * Insert this completed cqe into the swcq. + */ + PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n", + __func__, Q_PTR2IDX(ptr, wq->sq_size_log2), + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); + sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); + *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) + = sqp->cqe; + cq->sw_wptr++; + sqp->signaled = 0; + break; + } else + break; +} + +static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, + struct t3_cqe *read_cqe) +{ + read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; + read_cqe->len = wq->oldest_read->read_len; + read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | + V_CQE_SWCQE(SW_CQE(*hw_cqe)) | + V_CQE_OPCODE(T3_READ_REQ) | + V_CQE_TYPE(1)); +} + +/* + * Return a ptr to the next read wr in the SWSQ or NULL. + */ +static void advance_oldest_read(struct t3_wq *wq) +{ + + u32 rptr = wq->oldest_read - wq->sq + 1; + u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); + + while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { + wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); + + if (wq->oldest_read->opcode == T3_READ_REQ) + return; + rptr++; + } + wq->oldest_read = NULL; +} + +/* + * cxio_poll_cq + * + * Caller must: + * check the validity of the first CQE, + * supply the wq assicated with the qpid. + * + * credit: cq credit to return to sge. + * cqe_flushed: 1 iff the CQE is flushed. + * cqe: copy of the polled CQE. + * + * return value: + * 0 CQE returned, + * -1 CQE skipped, try again. + */ +int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, + u8 *cqe_flushed, u64 *cookie, u32 *credit) +{ + int ret = 0; + struct t3_cqe *hw_cqe, read_cqe; + + *cqe_flushed = 0; + *credit = 0; + hw_cqe = cxio_next_cqe(cq); + + PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x" + " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", + __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), + CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe), + CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), + CQE_WRID_LOW(*hw_cqe)); + + /* + * skip cqe's not affiliated with a QP. + */ + if (wq == NULL) { + ret = -1; + goto skip_cqe; + } + + /* + * Gotta tweak READ completions: + * 1) the cqe doesn't contain the sq_wptr from the wr. + * 2) opcode not reflected from the wr. + * 3) read_len not reflected from the wr. + * 4) cq_type is RQ_TYPE not SQ_TYPE. + */ + if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { + + /* + * If this is an unsolicited read response, then the read + * was generated by the kernel driver as part of peer-2-peer + * connection setup. So ignore the completion. + */ + if (!wq->oldest_read) { + if (CQE_STATUS(*hw_cqe)) + wq->error = 1; + ret = -1; + goto skip_cqe; + } + + /* + * Don't write to the HWCQ, so create a new read req CQE + * in local memory. + */ + create_read_req_cqe(wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(wq); + } + + /* + * T3A: Discard TERMINATE CQEs. + */ + if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { + ret = -1; + wq->error = 1; + goto skip_cqe; + } + + if (CQE_STATUS(*hw_cqe) || wq->error) { + *cqe_flushed = wq->error; + wq->error = 1; + + /* + * T3A inserts errors into the CQE. We cannot return + * these as work completions. + */ + /* incoming write failures */ + if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) + && RQ_TYPE(*hw_cqe)) { + ret = -1; + goto skip_cqe; + } + /* incoming read request failures */ + if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { + ret = -1; + goto skip_cqe; + } + + /* incoming SEND with no receive posted failures */ + if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && + Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { + ret = -1; + goto skip_cqe; + } + BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); + goto proc_cqe; + } + + /* + * RECV completion. + */ + if (RQ_TYPE(*hw_cqe)) { + + /* + * HW only validates 4 bits of MSN. So we must validate that + * the MSN in the SEND is the next expected MSN. If its not, + * then we complete this with TPT_ERR_MSN and mark the wq in + * error. + */ + + if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { + wq->error = 1; + ret = -1; + goto skip_cqe; + } + + if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { + wq->error = 1; + hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); + goto proc_cqe; + } + goto proc_cqe; + } + + /* + * If we get here its a send completion. + * + * Handle out of order completion. These get stuffed + * in the SW SQ. Then the SW SQ is walked to move any + * now in-order completions into the SW CQ. This handles + * 2 cases: + * 1) reaping unsignaled WRs when the first subsequent + * signaled WR is completed. + * 2) out of order read completions. + */ + if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { + struct t3_swsq *sqp; + + PDBG("%s out of order completion going in swsq at idx %ld\n", + __func__, + Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2)); + sqp = wq->sq + + Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); + sqp->cqe = *hw_cqe; + sqp->complete = 1; + ret = -1; + goto flush_wq; + } + +proc_cqe: + *cqe = *hw_cqe; + + /* + * Reap the associated WR(s) that are freed up with this + * completion. + */ + if (SQ_TYPE(*hw_cqe)) { + wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); + PDBG("%s completing sq idx %ld\n", __func__, + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); + *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; + wq->sq_rptr++; + } else { + PDBG("%s completing rq idx %ld\n", __func__, + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); + *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; + if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) + cxio_hal_pblpool_free(wq->rdev, + wq->rq[Q_PTR2IDX(wq->rq_rptr, + wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); + BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); + wq->rq_rptr++; + } + +flush_wq: + /* + * Flush any completed cqes that are now in-order. + */ + flush_completed_wrs(wq, cq); + +skip_cqe: + if (SW_CQE(*hw_cqe)) { + PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n", + __func__, cq, cq->cqid, cq->sw_rptr); + ++cq->sw_rptr; + } else { + PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n", + __func__, cq, cq->cqid, cq->rptr); + ++cq->rptr; + + /* + * T3A: compute credits. + */ + if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) + || ((cq->rptr - cq->wptr) >= 128)) { + *credit = cq->rptr - cq->wptr; + cq->wptr = cq->rptr; + } + } + return ret; +} diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h new file mode 100644 index 00000000..78fbe9ff --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __CXIO_HAL_H__ +#define __CXIO_HAL_H__ + +#include +#include +#include + +#include "t3_cpl.h" +#include "t3cdev.h" +#include "cxgb3_ctl_defs.h" +#include "cxio_wr.h" + +#define T3_CTRL_QP_ID FW_RI_SGEEC_START +#define T3_CTL_QP_TID FW_RI_TID_START +#define T3_CTRL_QP_SIZE_LOG2 8 +#define T3_CTRL_CQ_ID 0 + +#define T3_MAX_NUM_RI (1<<15) +#define T3_MAX_NUM_QP (1<<15) +#define T3_MAX_NUM_CQ (1<<15) +#define T3_MAX_NUM_PD (1<<15) +#define T3_MAX_PBL_SIZE 256 +#define T3_MAX_RQ_SIZE 1024 +#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) +#define T3_MAX_CQ_DEPTH 65536 +#define T3_MAX_NUM_STAG (1<<15) +#define T3_MAX_MR_SIZE 0x100000000ULL +#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ + +#define T3_STAG_UNSET 0xffffffff + +#define T3_MAX_DEV_NAME_LEN 32 + +#define CXIO_FW_MAJ 7 + +struct cxio_hal_ctrl_qp { + u32 wptr; + u32 rptr; + struct mutex lock; /* for the wtpr, can sleep */ + wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ + union t3_wr *workq; /* the work request queue */ + dma_addr_t dma_addr; /* pci bus address of the workq */ + DEFINE_DMA_UNMAP_ADDR(mapping); + void __iomem *doorbell; +}; + +struct cxio_hal_resource { + struct kfifo tpt_fifo; + spinlock_t tpt_fifo_lock; + struct kfifo qpid_fifo; + spinlock_t qpid_fifo_lock; + struct kfifo cqid_fifo; + spinlock_t cqid_fifo_lock; + struct kfifo pdid_fifo; + spinlock_t pdid_fifo_lock; +}; + +struct cxio_qpid_list { + struct list_head entry; + u32 qpid; +}; + +struct cxio_ucontext { + struct list_head qpids; + struct mutex lock; +}; + +struct cxio_rdev { + char dev_name[T3_MAX_DEV_NAME_LEN]; + struct t3cdev *t3cdev_p; + struct rdma_info rnic_info; + struct adap_ports port_info; + struct cxio_hal_resource *rscp; + struct cxio_hal_ctrl_qp ctrl_qp; + void *ulp; + unsigned long qpshift; + u32 qpnr; + u32 qpmask; + struct cxio_ucontext uctx; + struct gen_pool *pbl_pool; + struct gen_pool *rqt_pool; + struct list_head entry; + struct ch_embedded_info fw_info; + u32 flags; +#define CXIO_ERROR_FATAL 1 +}; + +static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) +{ + return rdev_p->flags & CXIO_ERROR_FATAL; +} + +static inline int cxio_num_stags(struct cxio_rdev *rdev_p) +{ + return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); +} + +typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p, + struct sk_buff * skb); + +#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff) +#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff) +#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1) +#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1) +#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1) +#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1) +#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1) +#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1) +#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1) + +struct respQ_msg_t { + __be32 flags; /* flit 0 */ + __be32 cq_ptrid; + __be64 rsvd; /* flit 1 */ + struct t3_cqe cqe; /* flits 2-3 */ +}; + +enum t3_cq_opcode { + CQ_ARM_AN = 0x2, + CQ_ARM_SE = 0x6, + CQ_FORCE_AN = 0x3, + CQ_CREDIT_UPDATE = 0x7 +}; + +int cxio_rdev_open(struct cxio_rdev *rdev); +void cxio_rdev_close(struct cxio_rdev *rdev); +int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, + enum t3_cq_opcode op, u32 credit); +int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); +int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); +void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); +int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, + struct cxio_ucontext *uctx); +int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, + struct cxio_ucontext *uctx); +int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size); +int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, u32 pbl_size, u32 pbl_addr); +int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, u32 pbl_size, u32 pbl_addr); +int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, + u32 pbl_addr); +int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); +int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); +int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); +int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); +void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); +void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); +u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); +void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); +int __init cxio_hal_init(void); +void __exit cxio_hal_exit(void); +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); +void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); +void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); +void cxio_flush_hw_cq(struct t3_cq *cq); +int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, + u8 *cqe_flushed, u64 *cookie, u32 *credit); +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); + +#define MOD "iw_cxgb3: " +#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args) + +#ifdef DEBUG +void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag); +void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift); +void cxio_dump_wqe(union t3_wr *wqe); +void cxio_dump_wce(struct t3_cqe *wce); +void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents); +void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid); +#endif + +#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c new file mode 100644 index 00000000..31f9201b --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* Crude resource management */ +#include +#include +#include +#include +#include +#include +#include "cxio_resource.h" +#include "cxio_hal.h" + +static struct kfifo rhdl_fifo; +static spinlock_t rhdl_fifo_lock; + +#define RANDOM_SIZE 16 + +static int __cxio_init_resource_fifo(struct kfifo *fifo, + spinlock_t *fifo_lock, + u32 nr, u32 skip_low, + u32 skip_high, + int random) +{ + u32 i, j, entry = 0, idx; + u32 random_bytes; + u32 rarray[16]; + spin_lock_init(fifo_lock); + + if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) + return -ENOMEM; + + for (i = 0; i < skip_low + skip_high; i++) + kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); + if (random) { + j = 0; + random_bytes = random32(); + for (i = 0; i < RANDOM_SIZE; i++) + rarray[i] = i + skip_low; + for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { + if (j >= RANDOM_SIZE) { + j = 0; + random_bytes = random32(); + } + idx = (random_bytes >> (j * 2)) & 0xF; + kfifo_in(fifo, + (unsigned char *) &rarray[idx], + sizeof(u32)); + rarray[idx] = i; + j++; + } + for (i = 0; i < RANDOM_SIZE; i++) + kfifo_in(fifo, + (unsigned char *) &rarray[i], + sizeof(u32)); + } else + for (i = skip_low; i < nr - skip_high; i++) + kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); + + for (i = 0; i < skip_low + skip_high; i++) + if (kfifo_out_locked(fifo, (unsigned char *) &entry, + sizeof(u32), fifo_lock) != sizeof(u32)) + break; + return 0; +} + +static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, + u32 nr, u32 skip_low, u32 skip_high) +{ + return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, + skip_high, 0)); +} + +static int cxio_init_resource_fifo_random(struct kfifo *fifo, + spinlock_t * fifo_lock, + u32 nr, u32 skip_low, u32 skip_high) +{ + + return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, + skip_high, 1)); +} + +static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) +{ + u32 i; + + spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); + + if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), + GFP_KERNEL)) + return -ENOMEM; + + for (i = 16; i < T3_MAX_NUM_QP; i++) + if (!(i & rdev_p->qpmask)) + kfifo_in(&rdev_p->rscp->qpid_fifo, + (unsigned char *) &i, sizeof(u32)); + return 0; +} + +int cxio_hal_init_rhdl_resource(u32 nr_rhdl) +{ + return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, + 0); +} + +void cxio_hal_destroy_rhdl_resource(void) +{ + kfifo_free(&rhdl_fifo); +} + +/* nr_* must be power of 2 */ +int cxio_hal_init_resource(struct cxio_rdev *rdev_p, + u32 nr_tpt, u32 nr_pbl, + u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) +{ + int err = 0; + struct cxio_hal_resource *rscp; + + rscp = kmalloc(sizeof(*rscp), GFP_KERNEL); + if (!rscp) + return -ENOMEM; + rdev_p->rscp = rscp; + err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, + &rscp->tpt_fifo_lock, + nr_tpt, 1, 0); + if (err) + goto tpt_err; + err = cxio_init_qpid_fifo(rdev_p); + if (err) + goto qpid_err; + err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, + nr_cqid, 1, 0); + if (err) + goto cqid_err; + err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, + nr_pdid, 1, 0); + if (err) + goto pdid_err; + return 0; +pdid_err: + kfifo_free(&rscp->cqid_fifo); +cqid_err: + kfifo_free(&rscp->qpid_fifo); +qpid_err: + kfifo_free(&rscp->tpt_fifo); +tpt_err: + return -ENOMEM; +} + +/* + * returns 0 if no resource available + */ +static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) +{ + u32 entry; + if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) + return entry; + else + return 0; /* fifo emptry */ +} + +static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, + u32 entry) +{ + BUG_ON( + kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) + == 0); +} + +u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) +{ + return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); +} + +void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) +{ + cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); +} + +u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) +{ + u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, + &rscp->qpid_fifo_lock); + PDBG("%s qpid 0x%x\n", __func__, qpid); + return qpid; +} + +void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) +{ + PDBG("%s qpid 0x%x\n", __func__, qpid); + cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); +} + +u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) +{ + return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); +} + +void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) +{ + cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); +} + +u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) +{ + return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); +} + +void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) +{ + cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); +} + +void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) +{ + kfifo_free(&rscp->tpt_fifo); + kfifo_free(&rscp->cqid_fifo); + kfifo_free(&rscp->qpid_fifo); + kfifo_free(&rscp->pdid_fifo); + kfree(rscp); +} + +/* + * PBL Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ + +u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) +{ + unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + return (u32)addr; +} + +void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); +} + +int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) +{ + unsigned pbl_start, pbl_chunk; + + rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); + if (!rdev_p->pbl_pool) + return -ENOMEM; + + pbl_start = rdev_p->rnic_info.pbl_base; + pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; + + while (pbl_start < rdev_p->rnic_info.pbl_top) { + pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, + pbl_chunk); + if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { + PDBG("%s failed to add PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { + printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n", + __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start); + return 0; + } + pbl_chunk >>= 1; + } else { + PDBG("%s added PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + pbl_start += pbl_chunk; + } + } + + return 0; +} + +void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) +{ + gen_pool_destroy(rdev_p->pbl_pool); +} + +/* + * RQT Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ +#define RQT_CHUNK 2*1024*1024 + +u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) +{ + unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); + return (u32)addr; +} + +void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6); + gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); +} + +int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) +{ + unsigned long i; + rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); + if (rdev_p->rqt_pool) + for (i = rdev_p->rnic_info.rqt_base; + i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; + i += RQT_CHUNK) + gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); + return rdev_p->rqt_pool ? 0 : -ENOMEM; +} + +void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) +{ + gen_pool_destroy(rdev_p->rqt_pool); +} diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h new file mode 100644 index 00000000..a2703a3d --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __CXIO_RESOURCE_H__ +#define __CXIO_RESOURCE_H__ + +#include +#include +#include +#include +#include +#include +#include +#include "cxio_hal.h" + +extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); +extern void cxio_hal_destroy_rhdl_resource(void); +extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p, + u32 nr_tpt, u32 nr_pbl, + u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, + u32 nr_pdid); +extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp); +extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag); +extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp); +extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid); +extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp); +extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid); +extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp); + +#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base ) +extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p); +extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p); +extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size); +extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); + +#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base ) +extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p); +extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p); +extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size); +extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); +#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h new file mode 100644 index 00000000..83d2e19d --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __CXIO_WR_H__ +#define __CXIO_WR_H__ + +#include +#include +#include +#include "firmware_exports.h" + +#define T3_MAX_SGE 4 +#define T3_MAX_INLINE 64 +#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) +#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) +#define T3_STAG0_PAGE_SHIFT 15 + +#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) +#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ + ((rptr)!=(wptr)) ) +#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1)) +#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<> S_FW_RIWR_OP)) & M_FW_RIWR_OP) + +#define S_FW_RIWR_SOPEOP 22 +#define M_FW_RIWR_SOPEOP 0x3 +#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP) + +#define S_FW_RIWR_FLAGS 8 +#define M_FW_RIWR_FLAGS 0x3fffff +#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS) +#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS) + +#define S_FW_RIWR_TID 8 +#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID) + +#define S_FW_RIWR_LEN 0 +#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN) + +#define S_FW_RIWR_GEN 31 +#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN) + +struct t3_sge { + __be32 stag; + __be32 len; + __be64 to; +}; + +/* If num_sgle is zero, flit 5+ contains immediate data.*/ +struct t3_send_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + + u8 rdmaop; /* 2 */ + u8 reserved[3]; + __be32 rem_stag; + __be32 plen; /* 3 */ + __be32 num_sgle; + struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ +}; + +#define T3_MAX_FASTREG_DEPTH 10 +#define T3_MAX_FASTREG_FRAG 10 + +struct t3_fastreg_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 stag; /* 2 */ + __be32 len; + __be32 va_base_hi; /* 3 */ + __be32 va_base_lo_fbo; + __be32 page_type_perms; /* 4 */ + __be32 reserved1; + __be64 pbl_addrs[0]; /* 5+ */ +}; + +/* + * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. + */ +struct t3_pbl_frag { + struct fw_riwrh wrh; /* 0 */ + __be64 pbl_addrs[14]; /* 1..14 */ +}; + +#define S_FR_PAGE_COUNT 24 +#define M_FR_PAGE_COUNT 0xff +#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) +#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) + +#define S_FR_PAGE_SIZE 16 +#define M_FR_PAGE_SIZE 0x1f +#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) +#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) + +#define S_FR_TYPE 8 +#define M_FR_TYPE 0x1 +#define V_FR_TYPE(x) ((x) << S_FR_TYPE) +#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) + +#define S_FR_PERMS 0 +#define M_FR_PERMS 0xff +#define V_FR_PERMS(x) ((x) << S_FR_PERMS) +#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) + +struct t3_local_inv_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 stag; /* 2 */ + __be32 reserved; +}; + +struct t3_rdma_write_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u8 rdmaop; /* 2 */ + u8 reserved[3]; + __be32 stag_sink; + __be64 to_sink; /* 3 */ + __be32 plen; /* 4 */ + __be32 num_sgle; + struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */ +}; + +struct t3_rdma_read_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u8 rdmaop; /* 2 */ + u8 local_inv; + u8 reserved[2]; + __be32 rem_stag; + __be64 rem_to; /* 3 */ + __be32 local_stag; /* 4 */ + __be32 local_len; + __be64 local_to; /* 5 */ +}; + +struct t3_bind_mw_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u16 reserved; /* 2 */ + u8 type; + u8 perms; + __be32 mr_stag; + __be32 mw_stag; /* 3 */ + __be32 mw_len; + __be64 mw_va; /* 4 */ + __be32 mr_pbl_addr; /* 5 */ + u8 reserved2[3]; + u8 mr_pagesz; +}; + +struct t3_receive_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u8 pagesz[T3_MAX_SGE]; + __be32 num_sgle; /* 2 */ + struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */ + __be32 pbl_addr[T3_MAX_SGE]; +}; + +struct t3_bypass_wr { + struct fw_riwrh wrh; + union t3_wrid wrid; /* 1 */ +}; + +struct t3_modify_qp_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 flags; /* 2 */ + __be32 quiesce; /* 2 */ + __be32 max_ird; /* 3 */ + __be32 max_ord; /* 3 */ + __be64 sge_cmd; /* 4 */ + __be64 ctx1; /* 5 */ + __be64 ctx0; /* 6 */ +}; + +enum t3_modify_qp_flags { + MODQP_QUIESCE = 0x01, + MODQP_MAX_IRD = 0x02, + MODQP_MAX_ORD = 0x04, + MODQP_WRITE_EC = 0x08, + MODQP_READ_EC = 0x10, +}; + + +enum t3_mpa_attrs { + uP_RI_MPA_RX_MARKER_ENABLE = 0x1, + uP_RI_MPA_TX_MARKER_ENABLE = 0x2, + uP_RI_MPA_CRC_ENABLE = 0x4, + uP_RI_MPA_IETF_ENABLE = 0x8 +} __attribute__ ((packed)); + +enum t3_qp_caps { + uP_RI_QP_RDMA_READ_ENABLE = 0x01, + uP_RI_QP_RDMA_WRITE_ENABLE = 0x02, + uP_RI_QP_BIND_ENABLE = 0x04, + uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, + uP_RI_QP_STAG0_ENABLE = 0x10 +} __attribute__ ((packed)); + +enum rdma_init_rtr_types { + RTR_READ = 1, + RTR_WRITE = 2, + RTR_SEND = 3, +}; + +#define S_RTR_TYPE 2 +#define M_RTR_TYPE 0x3 +#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) +#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) + +#define S_CHAN 4 +#define M_CHAN 0x3 +#define V_CHAN(x) ((x) << S_CHAN) +#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) + +struct t3_rdma_init_attr { + u32 tid; + u32 qpid; + u32 pdid; + u32 scqid; + u32 rcqid; + u32 rq_addr; + u32 rq_size; + enum t3_mpa_attrs mpaattrs; + enum t3_qp_caps qpcaps; + u16 tcp_emss; + u32 ord; + u32 ird; + u64 qp_dma_addr; + u32 qp_dma_size; + enum rdma_init_rtr_types rtr_type; + u16 flags; + u16 rqe_count; + u32 irs; + u32 chan; +}; + +struct t3_rdma_init_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 qpid; /* 2 */ + __be32 pdid; + __be32 scqid; /* 3 */ + __be32 rcqid; + __be32 rq_addr; /* 4 */ + __be32 rq_size; + u8 mpaattrs; /* 5 */ + u8 qpcaps; + __be16 ulpdu_size; + __be16 flags_rtr_type; + __be16 rqe_count; + __be32 ord; /* 6 */ + __be32 ird; + __be64 qp_dma_addr; /* 7 */ + __be32 qp_dma_size; /* 8 */ + __be32 irs; +}; + +struct t3_genbit { + u64 flit[15]; + __be64 genbit; +}; + +struct t3_wq_in_err { + u64 flit[13]; + u64 err; +}; + +enum rdma_init_wr_flags { + MPA_INITIATOR = (1<<0), + PRIV_QP = (1<<1), +}; + +union t3_wr { + struct t3_send_wr send; + struct t3_rdma_write_wr write; + struct t3_rdma_read_wr read; + struct t3_receive_wr recv; + struct t3_fastreg_wr fastreg; + struct t3_pbl_frag pbl_frag; + struct t3_local_inv_wr local_inv; + struct t3_bind_mw_wr bind; + struct t3_bypass_wr bypass; + struct t3_rdma_init_wr init; + struct t3_modify_qp_wr qp_mod; + struct t3_genbit genbit; + struct t3_wq_in_err wq_in_err; + __be64 flit[16]; +}; + +#define T3_SQ_CQE_FLIT 13 +#define T3_SQ_COOKIE_FLIT 14 + +#define T3_RQ_COOKIE_FLIT 13 +#define T3_RQ_CQE_FLIT 14 + +static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) +{ + return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); +} + +enum t3_wr_hdr_bits { + T3_EOP = 1, + T3_SOP = 2, + T3_SOPEOP = T3_EOP|T3_SOP, +}; + +static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, + enum t3_wr_flags flags, u8 genbit, u32 tid, + u8 len, u8 sopeop) +{ + wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | + V_FW_RIWR_SOPEOP(sopeop) | + V_FW_RIWR_FLAGS(flags)); + wmb(); + wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | + V_FW_RIWR_TID(tid) | + V_FW_RIWR_LEN(len)); + /* 2nd gen bit... */ + ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit); +} + +/* + * T3 ULP2_TX commands + */ +enum t3_utx_mem_op { + T3_UTX_MEM_READ = 2, + T3_UTX_MEM_WRITE = 3 +}; + +/* T3 MC7 RDMA TPT entry format */ + +enum tpt_mem_type { + TPT_NON_SHARED_MR = 0x0, + TPT_SHARED_MR = 0x1, + TPT_MW = 0x2, + TPT_MW_RELAXED_PROTECTION = 0x3 +}; + +enum tpt_addr_type { + TPT_ZBTO = 0, + TPT_VATO = 1 +}; + +enum tpt_mem_perm { + TPT_MW_BIND = 0x10, + TPT_LOCAL_READ = 0x8, + TPT_LOCAL_WRITE = 0x4, + TPT_REMOTE_READ = 0x2, + TPT_REMOTE_WRITE = 0x1 +}; + +struct tpt_entry { + __be32 valid_stag_pdid; + __be32 flags_pagesize_qpid; + + __be32 rsvd_pbl_addr; + __be32 len; + __be32 va_hi; + __be32 va_low_or_fbo; + + __be32 rsvd_bind_cnt_or_pstag; + __be32 rsvd_pbl_size; +}; + +#define S_TPT_VALID 31 +#define V_TPT_VALID(x) ((x) << S_TPT_VALID) +#define F_TPT_VALID V_TPT_VALID(1U) + +#define S_TPT_STAG_KEY 23 +#define M_TPT_STAG_KEY 0xFF +#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY) +#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY) + +#define S_TPT_STAG_STATE 22 +#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE) +#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U) + +#define S_TPT_STAG_TYPE 20 +#define M_TPT_STAG_TYPE 0x3 +#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE) +#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE) + +#define S_TPT_PDID 0 +#define M_TPT_PDID 0xFFFFF +#define V_TPT_PDID(x) ((x) << S_TPT_PDID) +#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID) + +#define S_TPT_PERM 28 +#define M_TPT_PERM 0xF +#define V_TPT_PERM(x) ((x) << S_TPT_PERM) +#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM) + +#define S_TPT_REM_INV_DIS 27 +#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS) +#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U) + +#define S_TPT_ADDR_TYPE 26 +#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE) +#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U) + +#define S_TPT_MW_BIND_ENABLE 25 +#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE) +#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U) + +#define S_TPT_PAGE_SIZE 20 +#define M_TPT_PAGE_SIZE 0x1F +#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE) +#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE) + +#define S_TPT_PBL_ADDR 0 +#define M_TPT_PBL_ADDR 0x1FFFFFFF +#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR) +#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR) + +#define S_TPT_QPID 0 +#define M_TPT_QPID 0xFFFFF +#define V_TPT_QPID(x) ((x) << S_TPT_QPID) +#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID) + +#define S_TPT_PSTAG 0 +#define M_TPT_PSTAG 0xFFFFFF +#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG) +#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG) + +#define S_TPT_PBL_SIZE 0 +#define M_TPT_PBL_SIZE 0xFFFFF +#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE) +#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE) + +/* + * CQE defs + */ +struct t3_cqe { + __be32 header; + __be32 len; + union { + struct { + __be32 stag; + __be32 msn; + } rcqe; + struct { + u32 wrid_hi; + u32 wrid_low; + } scqe; + } u; +}; + +#define S_CQE_OOO 31 +#define M_CQE_OOO 0x1 +#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO) +#define V_CEQ_OOO(x) ((x)<> S_CQE_QPID)) & M_CQE_QPID) +#define V_CQE_QPID(x) ((x)<> S_CQE_SWCQE)) & M_CQE_SWCQE) +#define V_CQE_SWCQE(x) ((x)<> S_CQE_GENBIT) & M_CQE_GENBIT) +#define V_CQE_GENBIT(x) ((x)<> S_CQE_STATUS)) & M_CQE_STATUS) +#define V_CQE_STATUS(x) ((x)<> S_CQE_TYPE)) & M_CQE_TYPE) +#define V_CQE_TYPE(x) ((x)<> S_CQE_OPCODE)) & M_CQE_OPCODE) +#define V_CQE_OPCODE(x) ((x)<queue[1 << cq->size_log2])->cq_err; +} + +static inline void cxio_set_cq_in_error(struct t3_cq *cq) +{ + ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err = 1; +} + +static inline void cxio_set_wq_in_error(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err |= 1; +} + +static inline void cxio_disable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err |= 2; +} + +static inline void cxio_enable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err &= ~2; +} + +static inline int cxio_wq_db_enabled(struct t3_wq *wq) +{ + return !(wq->queue->wq_in_err.err & 2); +} + +static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) +{ + struct t3_cqe *cqe; + + cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); + if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) + return cqe; + return NULL; +} + +static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq) +{ + struct t3_cqe *cqe; + + if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); + return cqe; + } + return NULL; +} + +static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq) +{ + struct t3_cqe *cqe; + + if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); + return cqe; + } + cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); + if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) + return cqe; + return NULL; +} + +#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c new file mode 100644 index 00000000..8e77dc54 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +#include + +#include "cxgb3_offload.h" +#include "iwch_provider.h" +#include "iwch_user.h" +#include "iwch.h" +#include "iwch_cm.h" + +#define DRV_VERSION "1.1" + +MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); +MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static void open_rnic_dev(struct t3cdev *); +static void close_rnic_dev(struct t3cdev *); +static void iwch_event_handler(struct t3cdev *, u32, u32); + +struct cxgb3_client t3c_client = { + .name = "iw_cxgb3", + .add = open_rnic_dev, + .remove = close_rnic_dev, + .handlers = t3c_handlers, + .redirect = iwch_ep_redirect, + .event_handler = iwch_event_handler +}; + +static LIST_HEAD(dev_list); +static DEFINE_MUTEX(dev_mutex); + +static int disable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + cxio_disable_wq_db(&qhp->wq); + return 0; +} + +static int enable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + if (data) + ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid); + cxio_enable_wq_db(&qhp->wq); + return 0; +} + +static void disable_dbs(struct iwch_dev *rnicp) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, disable_qp_db, NULL); + spin_unlock_irq(&rnicp->lock); +} + +static void enable_dbs(struct iwch_dev *rnicp, int ring_db) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, enable_qp_db, + (void *)(unsigned long)ring_db); + spin_unlock_irq(&rnicp->lock); +} + +static void iwch_db_drop_task(struct work_struct *work) +{ + struct iwch_dev *rnicp = container_of(work, struct iwch_dev, + db_drop_task.work); + enable_dbs(rnicp, 1); +} + +static void rnic_init(struct iwch_dev *rnicp) +{ + PDBG("%s iwch_dev %p\n", __func__, rnicp); + idr_init(&rnicp->cqidr); + idr_init(&rnicp->qpidr); + idr_init(&rnicp->mmidr); + spin_lock_init(&rnicp->lock); + INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); + + rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; + rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; + rnicp->attr.max_sge_per_wr = T3_MAX_SGE; + rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; + rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; + rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; + rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); + rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; + rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; + rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; + rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; + rnicp->attr.can_resize_wq = 0; + rnicp->attr.max_rdma_reads_per_qp = 8; + rnicp->attr.max_rdma_read_resources = + rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; + rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ + rnicp->attr.max_rdma_read_depth = + rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; + rnicp->attr.rq_overflow_handled = 0; + rnicp->attr.can_modify_ird = 0; + rnicp->attr.can_modify_ord = 0; + rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; + rnicp->attr.stag0_value = 1; + rnicp->attr.zbva_support = 1; + rnicp->attr.local_invalidate_fence = 1; + rnicp->attr.cq_overflow_detection = 1; + return; +} + +static void open_rnic_dev(struct t3cdev *tdev) +{ + struct iwch_dev *rnicp; + + PDBG("%s t3cdev %p\n", __func__, tdev); + printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n", + DRV_VERSION); + rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); + if (!rnicp) { + printk(KERN_ERR MOD "Cannot allocate ib device\n"); + return; + } + rnicp->rdev.ulp = rnicp; + rnicp->rdev.t3cdev_p = tdev; + + mutex_lock(&dev_mutex); + + if (cxio_rdev_open(&rnicp->rdev)) { + mutex_unlock(&dev_mutex); + printk(KERN_ERR MOD "Unable to open CXIO rdev\n"); + ib_dealloc_device(&rnicp->ibdev); + return; + } + + rnic_init(rnicp); + + list_add_tail(&rnicp->entry, &dev_list); + mutex_unlock(&dev_mutex); + + if (iwch_register_device(rnicp)) { + printk(KERN_ERR MOD "Unable to register device\n"); + close_rnic_dev(tdev); + } + printk(KERN_INFO MOD "Initialized device %s\n", + pci_name(rnicp->rdev.rnic_info.pdev)); + return; +} + +static void close_rnic_dev(struct t3cdev *tdev) +{ + struct iwch_dev *dev, *tmp; + PDBG("%s t3cdev %p\n", __func__, tdev); + mutex_lock(&dev_mutex); + list_for_each_entry_safe(dev, tmp, &dev_list, entry) { + if (dev->rdev.t3cdev_p == tdev) { + dev->rdev.flags = CXIO_ERROR_FATAL; + synchronize_net(); + cancel_delayed_work_sync(&dev->db_drop_task); + list_del(&dev->entry); + iwch_unregister_device(dev); + cxio_rdev_close(&dev->rdev); + idr_destroy(&dev->cqidr); + idr_destroy(&dev->qpidr); + idr_destroy(&dev->mmidr); + ib_dealloc_device(&dev->ibdev); + break; + } + } + mutex_unlock(&dev_mutex); +} + +static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) +{ + struct cxio_rdev *rdev = tdev->ulp; + struct iwch_dev *rnicp; + struct ib_event event; + u32 portnum = port_id + 1; + int dispatch = 0; + + if (!rdev) + return; + rnicp = rdev_to_iwch_dev(rdev); + switch (evt) { + case OFFLOAD_STATUS_DOWN: { + rdev->flags = CXIO_ERROR_FATAL; + synchronize_net(); + event.event = IB_EVENT_DEVICE_FATAL; + dispatch = 1; + break; + } + case OFFLOAD_PORT_DOWN: { + event.event = IB_EVENT_PORT_ERR; + dispatch = 1; + break; + } + case OFFLOAD_PORT_UP: { + event.event = IB_EVENT_PORT_ACTIVE; + dispatch = 1; + break; + } + case OFFLOAD_DB_FULL: { + disable_dbs(rnicp); + break; + } + case OFFLOAD_DB_EMPTY: { + enable_dbs(rnicp, 1); + break; + } + case OFFLOAD_DB_DROP: { + unsigned long delay = 1000; + unsigned short r; + + disable_dbs(rnicp); + get_random_bytes(&r, 2); + delay += r & 1023; + + /* + * delay is between 1000-2023 usecs. + */ + schedule_delayed_work(&rnicp->db_drop_task, + usecs_to_jiffies(delay)); + break; + } + } + + if (dispatch) { + event.device = &rnicp->ibdev; + event.element.port_num = portnum; + ib_dispatch_event(&event); + } + + return; +} + +static int __init iwch_init_module(void) +{ + int err; + + err = cxio_hal_init(); + if (err) + return err; + err = iwch_cm_init(); + if (err) + return err; + cxio_register_ev_cb(iwch_ev_dispatch); + cxgb3_register_client(&t3c_client); + return 0; +} + +static void __exit iwch_exit_module(void) +{ + cxgb3_unregister_client(&t3c_client); + cxio_unregister_ev_cb(iwch_ev_dispatch); + iwch_cm_term(); + cxio_hal_exit(); +} + +module_init(iwch_init_module); +module_exit(iwch_exit_module); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h new file mode 100644 index 00000000..a1c44578 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IWCH_H__ +#define __IWCH_H__ + +#include +#include +#include +#include +#include + +#include + +#include "cxio_hal.h" +#include "cxgb3_offload.h" + +struct iwch_pd; +struct iwch_cq; +struct iwch_qp; +struct iwch_mr; + +struct iwch_rnic_attributes { + u32 max_qps; + u32 max_wrs; /* Max for any SQ/RQ */ + u32 max_sge_per_wr; + u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */ + u32 max_cqs; + u32 max_cqes_per_cq; + u32 max_mem_regs; + u32 max_phys_buf_entries; /* for phys buf list */ + u32 max_pds; + + /* + * The memory page sizes supported by this RNIC. + * Bit position i in bitmap indicates page of + * size (4k)^i. Phys block list mode unsupported. + */ + u32 mem_pgsizes_bitmask; + u64 max_mr_size; + u8 can_resize_wq; + + /* + * The maximum number of RDMA Reads that can be outstanding + * per QP with this RNIC as the target. + */ + u32 max_rdma_reads_per_qp; + + /* + * The maximum number of resources used for RDMA Reads + * by this RNIC with this RNIC as the target. + */ + u32 max_rdma_read_resources; + + /* + * The max depth per QP for initiation of RDMA Read + * by this RNIC. + */ + u32 max_rdma_read_qp_depth; + + /* + * The maximum depth for initiation of RDMA Read + * operations by this RNIC on all QPs + */ + u32 max_rdma_read_depth; + u8 rq_overflow_handled; + u32 can_modify_ird; + u32 can_modify_ord; + u32 max_mem_windows; + u32 stag0_value; + u8 zbva_support; + u8 local_invalidate_fence; + u32 cq_overflow_detection; +}; + +struct iwch_dev { + struct ib_device ibdev; + struct cxio_rdev rdev; + u32 device_cap_flags; + struct iwch_rnic_attributes attr; + struct idr cqidr; + struct idr qpidr; + struct idr mmidr; + spinlock_t lock; + struct list_head entry; + struct delayed_work db_drop_task; +}; + +static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct iwch_dev, ibdev); +} + +static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) +{ + return container_of(rdev, struct iwch_dev, rdev); +} + +static inline int t3b_device(const struct iwch_dev *rhp) +{ + return rhp->rdev.t3cdev_p->type == T3B; +} + +static inline int t3a_device(const struct iwch_dev *rhp) +{ + return rhp->rdev.t3cdev_p->type == T3A; +} + +static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) +{ + return idr_find(&rhp->cqidr, cqid); +} + +static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) +{ + return idr_find(&rhp->qpidr, qpid); +} + +static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) +{ + return idr_find(&rhp->mmidr, mmid); +} + +static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + int ret; + int newid; + + do { + if (!idr_pre_get(idr, GFP_KERNEL)) { + return -ENOMEM; + } + spin_lock_irq(&rhp->lock); + ret = idr_get_new_above(idr, handle, id, &newid); + BUG_ON(newid != id); + spin_unlock_irq(&rhp->lock); + } while (ret == -EAGAIN); + + return ret; +} + +static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) +{ + spin_lock_irq(&rhp->lock); + idr_remove(idr, id); + spin_unlock_irq(&rhp->lock); +} + +extern struct cxgb3_client t3c_client; +extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; +extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb); + +#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c new file mode 100644 index 00000000..740dcc06 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -0,0 +1,2254 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "tcb.h" +#include "cxgb3_offload.h" +#include "iwch.h" +#include "iwch_provider.h" +#include "iwch_cm.h" + +static char *states[] = { + "idle", + "listen", + "connecting", + "mpa_wait_req", + "mpa_req_sent", + "mpa_req_rcvd", + "mpa_rep_sent", + "fpdu_mode", + "aborting", + "closing", + "moribund", + "dead", + NULL, +}; + +int peer2peer = 0; +module_param(peer2peer, int, 0644); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); + +static int ep_timeout_secs = 60; +module_param(ep_timeout_secs, int, 0644); +MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " + "in seconds (default=60)"); + +static int mpa_rev = 1; +module_param(mpa_rev, int, 0644); +MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " + "1 is spec compliant. (default=1)"); + +static int markers_enabled = 0; +module_param(markers_enabled, int, 0644); +MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); + +static int crc_enabled = 1; +module_param(crc_enabled, int, 0644); +MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); + +static int rcv_win = 256 * 1024; +module_param(rcv_win, int, 0644); +MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); + +static int snd_win = 32 * 1024; +module_param(snd_win, int, 0644); +MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); + +static unsigned int nocong = 0; +module_param(nocong, uint, 0644); +MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); + +static unsigned int cong_flavor = 1; +module_param(cong_flavor, uint, 0644); +MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); + +static struct workqueue_struct *workq; + +static struct sk_buff_head rxq; + +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); +static void ep_timeout(unsigned long arg); +static void connect_reply_upcall(struct iwch_ep *ep, int status); + +static void start_ep_timer(struct iwch_ep *ep) +{ + PDBG("%s ep %p\n", __func__, ep); + if (timer_pending(&ep->timer)) { + PDBG("%s stopped / restarted timer ep %p\n", __func__, ep); + del_timer_sync(&ep->timer); + } else + get_ep(&ep->com); + ep->timer.expires = jiffies + ep_timeout_secs * HZ; + ep->timer.data = (unsigned long)ep; + ep->timer.function = ep_timeout; + add_timer(&ep->timer); +} + +static void stop_ep_timer(struct iwch_ep *ep) +{ + PDBG("%s ep %p\n", __func__, ep); + if (!timer_pending(&ep->timer)) { + printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + return; + } + del_timer_sync(&ep->timer); + put_ep(&ep->com); +} + +static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = l2t_send(tdev, skb, l2e); + if (error < 0) + kfree_skb(skb); + return error; +} + +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = cxgb3_ofld_send(tdev, skb); + if (error < 0) + kfree_skb(skb); + return error; +} + +static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) +{ + struct cpl_tid_release *req; + + skb = get_skb(skb, sizeof *req, GFP_KERNEL); + if (!skb) + return; + req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); + skb->priority = CPL_PRIORITY_SETUP; + iwch_cxgb3_ofld_send(tdev, skb); + return; +} + +int iwch_quiesce_tid(struct iwch_ep *ep) +{ + struct cpl_set_tcb_field *req; + struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + + if (!skb) + return -ENOMEM; + req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); + req->reply = 0; + req->cpu_idx = 0; + req->word = htons(W_TCB_RX_QUIESCE); + req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); + req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); + + skb->priority = CPL_PRIORITY_DATA; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); +} + +int iwch_resume_tid(struct iwch_ep *ep) +{ + struct cpl_set_tcb_field *req; + struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + + if (!skb) + return -ENOMEM; + req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); + req->reply = 0; + req->cpu_idx = 0; + req->word = htons(W_TCB_RX_QUIESCE); + req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); + req->val = 0; + + skb->priority = CPL_PRIORITY_DATA; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); +} + +static void set_emss(struct iwch_ep *ep, u16 opt) +{ + PDBG("%s ep %p opt %u\n", __func__, ep, opt); + ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40; + if (G_TCPOPT_TSTAMP(opt)) + ep->emss -= 12; + if (ep->emss < 128) + ep->emss = 128; + PDBG("emss=%d\n", ep->emss); +} + +static enum iwch_ep_state state_read(struct iwch_ep_common *epc) +{ + unsigned long flags; + enum iwch_ep_state state; + + spin_lock_irqsave(&epc->lock, flags); + state = epc->state; + spin_unlock_irqrestore(&epc->lock, flags); + return state; +} + +static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) +{ + epc->state = new; +} + +static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) +{ + unsigned long flags; + + spin_lock_irqsave(&epc->lock, flags); + PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]); + __state_set(epc, new); + spin_unlock_irqrestore(&epc->lock, flags); + return; +} + +static void *alloc_ep(int size, gfp_t gfp) +{ + struct iwch_ep_common *epc; + + epc = kzalloc(size, gfp); + if (epc) { + kref_init(&epc->kref); + spin_lock_init(&epc->lock); + init_waitqueue_head(&epc->waitq); + } + PDBG("%s alloc ep %p\n", __func__, epc); + return epc; +} + +void __free_ep(struct kref *kref) +{ + struct iwch_ep *ep; + ep = container_of(container_of(kref, struct iwch_ep_common, kref), + struct iwch_ep, com); + PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); + dst_release(ep->dst); + l2t_release(ep->com.tdev, ep->l2t); + } + kfree(ep); +} + +static void release_ep_resources(struct iwch_ep *ep) +{ + PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); + set_bit(RELEASE_RESOURCES, &ep->com.flags); + put_ep(&ep->com); +} + +static int status2errno(int status) +{ + switch (status) { + case CPL_ERR_NONE: + return 0; + case CPL_ERR_CONN_RESET: + return -ECONNRESET; + case CPL_ERR_ARP_MISS: + return -EHOSTUNREACH; + case CPL_ERR_CONN_TIMEDOUT: + return -ETIMEDOUT; + case CPL_ERR_TCAM_FULL: + return -ENOMEM; + case CPL_ERR_CONN_EXIST: + return -EADDRINUSE; + default: + return -EIO; + } +} + +/* + * Try and reuse skbs already allocated... + */ +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) +{ + if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { + skb_trim(skb, 0); + skb_get(skb); + } else { + skb = alloc_skb(len, gfp); + } + return skb; +} + +static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, + __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct rtable *rt; + struct flowi4 fl4; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) + return NULL; + return rt; +} + +static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) +{ + int i = 0; + + while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) + ++i; + return i; +} + +static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb) +{ + PDBG("%s t3cdev %p\n", __func__, dev); + kfree_skb(skb); +} + +/* + * Handle an ARP failure for an active open. + */ +static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) +{ + printk(KERN_ERR MOD "ARP failure duing connect\n"); + kfree_skb(skb); +} + +/* + * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant + * and send it along. + */ +static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req *req = cplhdr(skb); + + PDBG("%s t3cdev %p\n", __func__, dev); + req->cmd = CPL_ABORT_NO_RST; + iwch_cxgb3_ofld_send(dev, skb); +} + +static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) +{ + struct cpl_close_con_req *req; + struct sk_buff *skb; + + PDBG("%s ep %p\n", __func__, ep); + skb = get_skb(NULL, sizeof(*req), gfp); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + return -ENOMEM; + } + skb->priority = CPL_PRIORITY_DATA; + set_arp_failure_handler(skb, arp_failure_discard); + req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); +} + +static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) +{ + struct cpl_abort_req *req; + + PDBG("%s ep %p\n", __func__, ep); + skb = get_skb(skb, sizeof(*req), gfp); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb.\n", + __func__); + return -ENOMEM; + } + skb->priority = CPL_PRIORITY_DATA; + set_arp_failure_handler(skb, abort_arp_failure); + req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); + req->cmd = CPL_ABORT_SEND_RST; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); +} + +static int send_connect(struct iwch_ep *ep) +{ + struct cpl_act_open_req *req; + struct sk_buff *skb; + u32 opt0h, opt0l, opt2; + unsigned int mtu_idx; + int wscale; + + PDBG("%s ep %p\n", __func__, ep); + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb.\n", + __func__); + return -ENOMEM; + } + mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); + wscale = compute_wscale(rcv_win); + opt0h = V_NAGLE(0) | + V_NO_CONG(nocong) | + V_KEEP_ALIVE(1) | + F_TCAM_BYPASS | + V_WND_SCALE(wscale) | + V_MSS_IDX(mtu_idx) | + V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); + opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); + skb->priority = CPL_PRIORITY_SETUP; + set_arp_failure_handler(skb, act_open_req_arp_failure); + + req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid)); + req->local_port = ep->com.local_addr.sin_port; + req->peer_port = ep->com.remote_addr.sin_port; + req->local_ip = ep->com.local_addr.sin_addr.s_addr; + req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; + req->opt0h = htonl(opt0h); + req->opt0l = htonl(opt0l); + req->params = 0; + req->opt2 = htonl(opt2); + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); +} + +static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) +{ + int mpalen; + struct tx_data_wr *req; + struct mpa_message *mpa; + int len; + + PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen); + + BUG_ON(skb_cloned(skb)); + + mpalen = sizeof(*mpa) + ep->plen; + if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { + kfree_skb(skb); + skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); + if (!skb) { + connect_reply_upcall(ep, -ENOMEM); + return; + } + } + skb_trim(skb, 0); + skb_reserve(skb, sizeof(*req)); + skb_put(skb, mpalen); + skb->priority = CPL_PRIORITY_DATA; + mpa = (struct mpa_message *) skb->data; + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); + mpa->flags = (crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->private_data_size = htons(ep->plen); + mpa->revision = mpa_rev; + + if (ep->plen) + memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function tx_ack() will deref it. + */ + skb_get(skb); + set_arp_failure_handler(skb, arp_failure_discard); + skb_reset_transport_header(skb); + len = skb->len; + req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); + req->wr_lo = htonl(V_WR_TID(ep->hwtid)); + req->len = htonl(len); + req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | + V_TX_SNDBUF(snd_win>>15)); + req->flags = htonl(F_TX_INIT); + req->sndseq = htonl(ep->snd_seq); + BUG_ON(ep->mpa_skb); + ep->mpa_skb = skb; + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); + start_ep_timer(ep); + state_set(&ep->com, MPA_REQ_SENT); + return; +} + +static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) +{ + int mpalen; + struct tx_data_wr *req; + struct mpa_message *mpa; + struct sk_buff *skb; + + PDBG("%s ep %p plen %d\n", __func__, ep, plen); + + mpalen = sizeof(*mpa) + plen; + + skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + skb_reserve(skb, sizeof(*req)); + mpa = (struct mpa_message *) skb_put(skb, mpalen); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = MPA_REJECT; + mpa->revision = mpa_rev; + mpa->private_data_size = htons(plen); + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb again. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function tx_ack() will deref it. + */ + skb_get(skb); + skb->priority = CPL_PRIORITY_DATA; + set_arp_failure_handler(skb, arp_failure_discard); + skb_reset_transport_header(skb); + req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); + req->wr_lo = htonl(V_WR_TID(ep->hwtid)); + req->len = htonl(mpalen); + req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | + V_TX_SNDBUF(snd_win>>15)); + req->flags = htonl(F_TX_INIT); + req->sndseq = htonl(ep->snd_seq); + BUG_ON(ep->mpa_skb); + ep->mpa_skb = skb; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); +} + +static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) +{ + int mpalen; + struct tx_data_wr *req; + struct mpa_message *mpa; + int len; + struct sk_buff *skb; + + PDBG("%s ep %p plen %d\n", __func__, ep, plen); + + mpalen = sizeof(*mpa) + plen; + + skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + skb->priority = CPL_PRIORITY_DATA; + skb_reserve(skb, sizeof(*req)); + mpa = (struct mpa_message *) skb_put(skb, mpalen); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->revision = mpa_rev; + mpa->private_data_size = htons(plen); + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function tx_ack() will deref it. + */ + skb_get(skb); + set_arp_failure_handler(skb, arp_failure_discard); + skb_reset_transport_header(skb); + len = skb->len; + req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); + req->wr_lo = htonl(V_WR_TID(ep->hwtid)); + req->len = htonl(len); + req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | + V_TX_SNDBUF(snd_win>>15)); + req->flags = htonl(F_TX_INIT); + req->sndseq = htonl(ep->snd_seq); + ep->mpa_skb = skb; + state_set(&ep->com, MPA_REP_SENT); + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); +} + +static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + struct cpl_act_establish *req = cplhdr(skb); + unsigned int tid = GET_TID(req); + + PDBG("%s ep %p tid %d\n", __func__, ep, tid); + + dst_confirm(ep->dst); + + /* setup the hwtid for this connection */ + ep->hwtid = tid; + cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); + + ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); + + set_emss(ep, ntohs(req->tcp_opt)); + + /* dealloc the atid */ + cxgb3_free_atid(ep->com.tdev, ep->atid); + + /* start MPA negotiation */ + send_mpa_req(ep, skb); + + return 0; +} + +static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) +{ + PDBG("%s ep %p\n", __FILE__, ep); + state_set(&ep->com, ABORTING); + send_abort(ep, skb, gfp); +} + +static void close_complete_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p\n", __func__, ep); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + if (ep->com.cm_id) { + PDBG("close complete delivered ep %p cm_id %p tid %d\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void peer_close_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p\n", __func__, ep); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_DISCONNECT; + if (ep->com.cm_id) { + PDBG("peer close delivered ep %p cm_id %p tid %d\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static void peer_abort_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p\n", __func__, ep); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = -ECONNRESET; + if (ep->com.cm_id) { + PDBG("abort delivered ep %p cm_id %p tid %d\n", ep, + ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void connect_reply_upcall(struct iwch_ep *ep, int status) +{ + struct iw_cm_event event; + + PDBG("%s ep %p status %d\n", __func__, ep, status); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REPLY; + event.status = status; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + + if ((status == 0) || (status == -ECONNREFUSED)) { + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + } + if (ep->com.cm_id) { + PDBG("%s ep %p tid %d status %d\n", __func__, ep, + ep->hwtid, status); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } + if (status < 0) { + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void connect_request_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REQUEST; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + event.provider_data = ep; + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; + if (state_read(&ep->parent_ep->com) != DEAD) { + get_ep(&ep->com); + ep->parent_ep->com.cm_id->event_handler( + ep->parent_ep->com.cm_id, + &event); + } + put_ep(&ep->parent_ep->com); + ep->parent_ep = NULL; +} + +static void established_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p\n", __func__, ep); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_ESTABLISHED; + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; + if (ep->com.cm_id) { + PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static int update_rx_credits(struct iwch_ep *ep, u32 credits) +{ + struct cpl_rx_data_ack *req; + struct sk_buff *skb; + + PDBG("%s ep %p credits %u\n", __func__, ep, credits); + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n"); + return 0; + } + + req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); + req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); + skb->priority = CPL_PRIORITY_ACK; + iwch_cxgb3_ofld_send(ep->com.tdev, skb); + return credits; +} + +static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + u16 plen; + struct iwch_qp_attributes attrs; + enum iwch_qp_attr_mask mask; + int err; + + PDBG("%s ep %p\n", __func__, ep); + + /* + * Stop mpa timer. If it expired, then the state has + * changed and we bail since ep_timeout already aborted + * the connection. + */ + stop_ep_timer(ep); + if (state_read(&ep->com) != MPA_REQ_SENT) + return; + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { + err = -EINVAL; + goto err; + } + + /* + * copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * if we don't even have the mpa message, then bail. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return; + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* Validate MPA header. */ + if (mpa->revision != mpa_rev) { + err = -EPROTO; + goto err; + } + if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { + err = -EPROTO; + goto err; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + err = -EPROTO; + goto err; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + err = -EPROTO; + goto err; + } + + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return; + + if (mpa->flags & MPA_REJECT) { + err = -ECONNREFUSED; + goto err; + } + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. And + * the MPA header is valid. + */ + state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.initiator = 1; + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa_rev; + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d\n", __func__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); + + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = IWCH_QP_STATE_RTS; + + mask = IWCH_QP_ATTR_NEXT_STATE | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; + + /* bind QP and TID with INIT_WR */ + err = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err; + + if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { + iwch_post_zb_read(ep); + } + + goto out; +err: + abort_connection(ep, skb, GFP_KERNEL); +out: + connect_reply_upcall(ep, err); + return; +} + +static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + u16 plen; + + PDBG("%s ep %p\n", __func__, ep); + + /* + * Stop mpa timer. If it expired, then the state has + * changed and we bail since ep_timeout already aborted + * the connection. + */ + stop_ep_timer(ep); + if (state_read(&ep->com) != MPA_REQ_WAIT) + return; + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + + /* + * Copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * If we don't even have the mpa message, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return; + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* + * Validate MPA Header. + */ + if (mpa->revision != mpa_rev) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return; + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. + */ + ep->mpa_attr.initiator = 0; + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa_rev; + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d\n", __func__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); + + state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + connect_request_upcall(ep); + return; +} + +static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + struct cpl_rx_data *hdr = cplhdr(skb); + unsigned int dlen = ntohs(hdr->len); + + PDBG("%s ep %p dlen %u\n", __func__, ep, dlen); + + skb_pull(skb, sizeof(*hdr)); + skb_trim(skb, dlen); + + ep->rcv_seq += dlen; + BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); + + switch (state_read(&ep->com)) { + case MPA_REQ_SENT: + process_mpa_reply(ep, skb); + break; + case MPA_REQ_WAIT: + process_mpa_request(ep, skb); + break; + case MPA_REP_SENT: + break; + default: + printk(KERN_ERR MOD "%s Unexpected streaming data." + " ep %p state %d tid %d\n", + __func__, ep, state_read(&ep->com), ep->hwtid); + + /* + * The ep will timeout and inform the ULP of the failure. + * See ep_timeout(). + */ + break; + } + + /* update RX credits */ + update_rx_credits(ep, dlen); + + return CPL_RET_BUF_DONE; +} + +/* + * Upcall from the adapter indicating data has been transmitted. + * For us its just the single MPA request or reply. We can now free + * the skb holding the mpa message. + */ +static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + struct cpl_wr_ack *hdr = cplhdr(skb); + unsigned int credits = ntohs(hdr->credits); + unsigned long flags; + int post_zb = 0; + + PDBG("%s ep %p credits %u\n", __func__, ep, credits); + + if (credits == 0) { + PDBG("%s 0 credit ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); + return CPL_RET_BUF_DONE; + } + + spin_lock_irqsave(&ep->com.lock, flags); + BUG_ON(credits != 1); + dst_confirm(ep->dst); + if (!ep->mpa_skb) { + PDBG("%s rdma_init wr_ack ep %p state %u\n", + __func__, ep, ep->com.state); + if (ep->mpa_attr.initiator) { + PDBG("%s initiator ep %p state %u\n", + __func__, ep, ep->com.state); + if (peer2peer && ep->com.state == FPDU_MODE) + post_zb = 1; + } else { + PDBG("%s responder ep %p state %u\n", + __func__, ep, ep->com.state); + if (ep->com.state == MPA_REQ_RCVD) { + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + } + } + } else { + PDBG("%s lsm ack ep %p state %u freeing skb\n", + __func__, ep, ep->com.state); + kfree_skb(ep->mpa_skb); + ep->mpa_skb = NULL; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (post_zb) + iwch_post_zb_read(ep); + return CPL_RET_BUF_DONE; +} + +static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + unsigned long flags; + int release = 0; + + PDBG("%s ep %p\n", __func__, ep); + BUG_ON(!ep); + + /* + * We get 2 abort replies from the HW. The first one must + * be ignored except for scribbling that we need one more. + */ + if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { + return CPL_RET_BUF_DONE; + } + + spin_lock_irqsave(&ep->com.lock, flags); + switch (ep->com.state) { + case ABORTING: + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + printk(KERN_ERR "%s ep %p state %d\n", + __func__, ep, ep->com.state); + break; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + + if (release) + release_ep_resources(ep); + return CPL_RET_BUF_DONE; +} + +/* + * Return whether a failed active open has allocated a TID + */ +static inline int act_open_has_tid(int status) +{ + return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && + status != CPL_ERR_ARP_MISS; +} + +static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + struct cpl_act_open_rpl *rpl = cplhdr(skb); + + PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status, + status2errno(rpl->status)); + connect_reply_upcall(ep, status2errno(rpl->status)); + state_set(&ep->com, DEAD); + if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) + release_tid(ep->com.tdev, GET_TID(rpl), NULL); + cxgb3_free_atid(ep->com.tdev, ep->atid); + dst_release(ep->dst); + l2t_release(ep->com.tdev, ep->l2t); + put_ep(&ep->com); + return CPL_RET_BUF_DONE; +} + +static int listen_start(struct iwch_listen_ep *ep) +{ + struct sk_buff *skb; + struct cpl_pass_open_req *req; + + PDBG("%s ep %p\n", __func__, ep); + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n"); + return -ENOMEM; + } + + req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid)); + req->local_port = ep->com.local_addr.sin_port; + req->local_ip = ep->com.local_addr.sin_addr.s_addr; + req->peer_port = 0; + req->peer_ip = 0; + req->peer_netmask = 0; + req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS); + req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10)); + req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); + + skb->priority = 1; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); +} + +static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_listen_ep *ep = ctx; + struct cpl_pass_open_rpl *rpl = cplhdr(skb); + + PDBG("%s ep %p status %d error %d\n", __func__, ep, + rpl->status, status2errno(rpl->status)); + ep->com.rpl_err = status2errno(rpl->status); + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + + return CPL_RET_BUF_DONE; +} + +static int listen_stop(struct iwch_listen_ep *ep) +{ + struct sk_buff *skb; + struct cpl_close_listserv_req *req; + + PDBG("%s ep %p\n", __func__, ep); + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + return -ENOMEM; + } + req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->cpu_idx = 0; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); + skb->priority = 1; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); +} + +static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, + void *ctx) +{ + struct iwch_listen_ep *ep = ctx; + struct cpl_close_listserv_rpl *rpl = cplhdr(skb); + + PDBG("%s ep %p\n", __func__, ep); + ep->com.rpl_err = status2errno(rpl->status); + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + return CPL_RET_BUF_DONE; +} + +static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) +{ + struct cpl_pass_accept_rpl *rpl; + unsigned int mtu_idx; + u32 opt0h, opt0l, opt2; + int wscale; + + PDBG("%s ep %p\n", __func__, ep); + BUG_ON(skb_cloned(skb)); + skb_trim(skb, sizeof(*rpl)); + skb_get(skb); + mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); + wscale = compute_wscale(rcv_win); + opt0h = V_NAGLE(0) | + V_NO_CONG(nocong) | + V_KEEP_ALIVE(1) | + F_TCAM_BYPASS | + V_WND_SCALE(wscale) | + V_MSS_IDX(mtu_idx) | + V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); + opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); + + rpl = cplhdr(skb); + rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); + rpl->peer_ip = peer_ip; + rpl->opt0h = htonl(opt0h); + rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT); + rpl->opt2 = htonl(opt2); + rpl->rsvd = rpl->opt2; /* workaround for HW bug */ + skb->priority = CPL_PRIORITY_SETUP; + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); + + return; +} + +static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, + struct sk_buff *skb) +{ + PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid, + peer_ip); + BUG_ON(skb_cloned(skb)); + skb_trim(skb, sizeof(struct cpl_tid_release)); + skb_get(skb); + + if (tdev->type != T3A) + release_tid(tdev, hwtid, skb); + else { + struct cpl_pass_accept_rpl *rpl; + + rpl = cplhdr(skb); + skb->priority = CPL_PRIORITY_SETUP; + rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + hwtid)); + rpl->peer_ip = peer_ip; + rpl->opt0h = htonl(F_TCAM_BYPASS); + rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); + rpl->opt2 = 0; + rpl->rsvd = rpl->opt2; + iwch_cxgb3_ofld_send(tdev, skb); + } +} + +static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *child_ep, *parent_ep = ctx; + struct cpl_pass_accept_req *req = cplhdr(skb); + unsigned int hwtid = GET_TID(req); + struct dst_entry *dst; + struct l2t_entry *l2t; + struct rtable *rt; + struct iff_mac tim; + + PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); + + if (state_read(&parent_ep->com) != LISTEN) { + printk(KERN_ERR "%s - listening ep not in LISTEN\n", + __func__); + goto reject; + } + + /* + * Find the netdev for this connection request. + */ + tim.mac_addr = req->dst_mac; + tim.vlan_tag = ntohs(req->vlan_tag); + if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { + printk(KERN_ERR "%s bad dst mac %pM\n", + __func__, req->dst_mac); + goto reject; + } + + /* Find output route */ + rt = find_route(tdev, + req->local_ip, + req->peer_ip, + req->local_port, + req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid))); + if (!rt) { + printk(KERN_ERR MOD "%s - failed to find dst entry!\n", + __func__); + goto reject; + } + dst = &rt->dst; + l2t = t3_l2t_get(tdev, dst, NULL); + if (!l2t) { + printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", + __func__); + dst_release(dst); + goto reject; + } + child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (!child_ep) { + printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", + __func__); + l2t_release(tdev, l2t); + dst_release(dst); + goto reject; + } + state_set(&child_ep->com, CONNECTING); + child_ep->com.tdev = tdev; + child_ep->com.cm_id = NULL; + child_ep->com.local_addr.sin_family = PF_INET; + child_ep->com.local_addr.sin_port = req->local_port; + child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; + child_ep->com.remote_addr.sin_family = PF_INET; + child_ep->com.remote_addr.sin_port = req->peer_port; + child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; + get_ep(&parent_ep->com); + child_ep->parent_ep = parent_ep; + child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid)); + child_ep->l2t = l2t; + child_ep->dst = dst; + child_ep->hwtid = hwtid; + init_timer(&child_ep->timer); + cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid); + accept_cr(child_ep, req->peer_ip, skb); + goto out; +reject: + reject_cr(tdev, hwtid, req->peer_ip, skb); +out: + return CPL_RET_BUF_DONE; +} + +static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + struct cpl_pass_establish *req = cplhdr(skb); + + PDBG("%s ep %p\n", __func__, ep); + ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); + + set_emss(ep, ntohs(req->tcp_opt)); + + dst_confirm(ep->dst); + state_set(&ep->com, MPA_REQ_WAIT); + start_ep_timer(ep); + + return CPL_RET_BUF_DONE; +} + +static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + struct iwch_qp_attributes attrs; + unsigned long flags; + int disconnect = 1; + int release = 0; + + PDBG("%s ep %p\n", __func__, ep); + dst_confirm(ep->dst); + + spin_lock_irqsave(&ep->com.lock, flags); + switch (ep->com.state) { + case MPA_REQ_WAIT: + __state_set(&ep->com, CLOSING); + break; + case MPA_REQ_SENT: + __state_set(&ep->com, CLOSING); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see iwch_accept_cr()). + */ + __state_set(&ep->com, CLOSING); + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); + break; + case MPA_REP_SENT: + __state_set(&ep->com, CLOSING); + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); + break; + case FPDU_MODE: + start_ep_timer(ep); + __state_set(&ep->com, CLOSING); + attrs.next_state = IWCH_QP_STATE_CLOSING; + iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); + peer_close_upcall(ep); + break; + case ABORTING: + disconnect = 0; + break; + case CLOSING: + __state_set(&ep->com, MORIBUND); + disconnect = 0; + break; + case MORIBUND: + stop_ep_timer(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); + } + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + disconnect = 0; + break; + case DEAD: + disconnect = 0; + break; + default: + BUG_ON(1); + } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (disconnect) + iwch_ep_disconnect(ep, 0, GFP_KERNEL); + if (release) + release_ep_resources(ep); + return CPL_RET_BUF_DONE; +} + +/* + * Returns whether an ABORT_REQ_RSS message is a negative advice. + */ +static int is_neg_adv_abort(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE; +} + +static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct cpl_abort_req_rss *req = cplhdr(skb); + struct iwch_ep *ep = ctx; + struct cpl_abort_rpl *rpl; + struct sk_buff *rpl_skb; + struct iwch_qp_attributes attrs; + int ret; + int release = 0; + unsigned long flags; + + if (is_neg_adv_abort(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, + ep->hwtid); + t3_l2t_send_event(ep->com.tdev, ep->l2t); + return CPL_RET_BUF_DONE; + } + + /* + * We get 2 peer aborts from the HW. The first one must + * be ignored except for scribbling that we need one more. + */ + if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { + return CPL_RET_BUF_DONE; + } + + spin_lock_irqsave(&ep->com.lock, flags); + PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state); + switch (ep->com.state) { + case CONNECTING: + break; + case MPA_REQ_WAIT: + stop_ep_timer(ep); + break; + case MPA_REQ_SENT: + stop_ep_timer(ep); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REP_SENT: + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see iwch_accept_cr()). + */ + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); + break; + case MORIBUND: + case CLOSING: + stop_ep_timer(ep); + /*FALLTHROUGH*/ + case FPDU_MODE: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = IWCH_QP_STATE_ERROR; + ret = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + printk(KERN_ERR MOD + "%s - qp <- error failed!\n", + __func__); + } + peer_abort_upcall(ep); + break; + case ABORTING: + break; + case DEAD: + PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + spin_unlock_irqrestore(&ep->com.lock, flags); + return CPL_RET_BUF_DONE; + default: + BUG_ON(1); + break; + } + dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + release = 1; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + + rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); + if (!rpl_skb) { + printk(KERN_ERR MOD "%s - cannot allocate skb!\n", + __func__); + release = 1; + goto out; + } + rpl_skb->priority = CPL_PRIORITY_DATA; + rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); + rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); + rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); + rpl->cmd = CPL_ABORT_NO_RST; + iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); +out: + if (release) + release_ep_resources(ep); + return CPL_RET_BUF_DONE; +} + +static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + struct iwch_qp_attributes attrs; + unsigned long flags; + int release = 0; + + PDBG("%s ep %p\n", __func__, ep); + BUG_ON(!ep); + + /* The cm_id may be null if we failed to connect */ + spin_lock_irqsave(&ep->com.lock, flags); + switch (ep->com.state) { + case CLOSING: + __state_set(&ep->com, MORIBUND); + break; + case MORIBUND: + stop_ep_timer(ep); + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + case ABORTING: + case DEAD: + break; + default: + BUG_ON(1); + break; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (release) + release_ep_resources(ep); + return CPL_RET_BUF_DONE; +} + +/* + * T3A does 3 things when a TERM is received: + * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet + * 2) generate an async event on the QP with the TERMINATE opcode + * 3) post a TERMINATE opcde cqe into the associated CQ. + * + * For (1), we save the message in the qp for later consumer consumption. + * For (2), we move the QP into TERMINATE, post a QP event and disconnect. + * For (3), we toss the CQE in cxio_poll_cq(). + * + * terminate() handles case (1)... + */ +static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep *ep = ctx; + + if (state_read(&ep->com) != FPDU_MODE) + return CPL_RET_BUF_DONE; + + PDBG("%s ep %p\n", __func__, ep); + skb_pull(skb, sizeof(struct cpl_rdma_terminate)); + PDBG("%s saving %d bytes of term msg\n", __func__, skb->len); + skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, + skb->len); + ep->com.qp->attr.terminate_msg_len = skb->len; + ep->com.qp->attr.is_terminate_local = 0; + return CPL_RET_BUF_DONE; +} + +static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct cpl_rdma_ec_status *rep = cplhdr(skb); + struct iwch_ep *ep = ctx; + + PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, + rep->status); + if (rep->status) { + struct iwch_qp_attributes attrs; + + printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n", + __func__, ep->hwtid); + stop_ep_timer(ep); + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + abort_connection(ep, NULL, GFP_KERNEL); + } + return CPL_RET_BUF_DONE; +} + +static void ep_timeout(unsigned long arg) +{ + struct iwch_ep *ep = (struct iwch_ep *)arg; + struct iwch_qp_attributes attrs; + unsigned long flags; + int abort = 1; + + spin_lock_irqsave(&ep->com.lock, flags); + PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, + ep->com.state); + switch (ep->com.state) { + case MPA_REQ_SENT: + __state_set(&ep->com, ABORTING); + connect_reply_upcall(ep, -ETIMEDOUT); + break; + case MPA_REQ_WAIT: + __state_set(&ep->com, ABORTING); + break; + case CLOSING: + case MORIBUND: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + __state_set(&ep->com, ABORTING); + break; + default: + printk(KERN_ERR "%s unexpected state ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + abort = 0; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (abort) + abort_connection(ep, NULL, GFP_ATOMIC); + put_ep(&ep->com); +} + +int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err; + struct iwch_ep *ep = to_ep(cm_id); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + if (state_read(&ep->com) == DEAD) { + put_ep(&ep->com); + return -ECONNRESET; + } + BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + if (mpa_rev == 0) + abort_connection(ep, NULL, GFP_KERNEL); + else { + err = send_mpa_reject(ep, pdata, pdata_len); + err = iwch_ep_disconnect(ep, 0, GFP_KERNEL); + } + put_ep(&ep->com); + return 0; +} + +int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err; + struct iwch_qp_attributes attrs; + enum iwch_qp_attr_mask mask; + struct iwch_ep *ep = to_ep(cm_id); + struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct iwch_qp *qp = get_qhp(h, conn_param->qpn); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + if (state_read(&ep->com) == DEAD) { + err = -ECONNRESET; + goto err; + } + + BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(!qp); + + if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || + (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { + abort_connection(ep, NULL, GFP_KERNEL); + err = -EINVAL; + goto err; + } + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = qp; + + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (peer2peer && ep->ird == 0) + ep->ird = 1; + + PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); + + /* bind QP to EP and move to RTS */ + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = IWCH_QP_STATE_RTS; + + /* bind QP and TID with INIT_WR */ + mask = IWCH_QP_ATTR_NEXT_STATE | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | + IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_MAX_IRD | + IWCH_QP_ATTR_MAX_ORD; + + err = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err1; + + /* if needed, wait for wr_ack */ + if (iwch_rqes_posted(qp)) { + wait_event(ep->com.waitq, ep->com.rpl_done); + err = ep->com.rpl_err; + if (err) + goto err1; + } + + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err1; + + + state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + put_ep(&ep->com); + return 0; +err1: + ep->com.cm_id = NULL; + ep->com.qp = NULL; + cm_id->rem_ref(cm_id); +err: + put_ep(&ep->com); + return err; +} + +static int is_loopback_dst(struct iw_cm_id *cm_id) +{ + struct net_device *dev; + + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); + if (!dev) + return 0; + dev_put(dev); + return 1; +} + +int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct iwch_ep *ep; + struct rtable *rt; + int err = 0; + + if (is_loopback_dst(cm_id)) { + err = -ENOSYS; + goto out; + } + + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); + err = -ENOMEM; + goto out; + } + init_timer(&ep->timer); + ep->plen = conn_param->private_data_len; + if (ep->plen) + memcpy(ep->mpa_pkt + sizeof(struct mpa_message), + conn_param->private_data, ep->plen); + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (peer2peer && ep->ord == 0) + ep->ord = 1; + + ep->com.tdev = h->rdev.t3cdev_p; + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = get_qhp(h, conn_param->qpn); + BUG_ON(!ep->com.qp); + PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, + ep->com.qp, cm_id); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep); + if (ep->atid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + + /* find a route */ + rt = find_route(h->rdev.t3cdev_p, + cm_id->local_addr.sin_addr.s_addr, + cm_id->remote_addr.sin_addr.s_addr, + cm_id->local_addr.sin_port, + cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); + if (!rt) { + printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL); + if (!ep->l2t) { + printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); + err = -ENOMEM; + goto fail4; + } + + state_set(&ep->com, CONNECTING); + ep->tos = IPTOS_LOWDELAY; + ep->com.local_addr = cm_id->local_addr; + ep->com.remote_addr = cm_id->remote_addr; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + l2t_release(h->rdev.t3cdev_p, ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + cxgb3_free_atid(ep->com.tdev, ep->atid); +fail2: + cm_id->rem_ref(cm_id); + put_ep(&ep->com); +out: + return err; +} + +int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int err = 0; + struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct iwch_listen_ep *ep; + + + might_sleep(); + + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); + err = -ENOMEM; + goto fail1; + } + PDBG("%s ep %p\n", __func__, ep); + ep->com.tdev = h->rdev.t3cdev_p; + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->backlog = backlog; + ep->com.local_addr = cm_id->local_addr; + + /* + * Allocate a server TID. + */ + ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep); + if (ep->stid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + + state_set(&ep->com, LISTEN); + err = listen_start(ep); + if (err) + goto fail3; + + /* wait for pass_open_rpl */ + wait_event(ep->com.waitq, ep->com.rpl_done); + err = ep->com.rpl_err; + if (!err) { + cm_id->provider_data = ep; + goto out; + } +fail3: + cxgb3_free_stid(ep->com.tdev, ep->stid); +fail2: + cm_id->rem_ref(cm_id); + put_ep(&ep->com); +fail1: +out: + return err; +} + +int iwch_destroy_listen(struct iw_cm_id *cm_id) +{ + int err; + struct iwch_listen_ep *ep = to_listen_ep(cm_id); + + PDBG("%s ep %p\n", __func__, ep); + + might_sleep(); + state_set(&ep->com, DEAD); + ep->com.rpl_done = 0; + ep->com.rpl_err = 0; + err = listen_stop(ep); + if (err) + goto done; + wait_event(ep->com.waitq, ep->com.rpl_done); + cxgb3_free_stid(ep->com.tdev, ep->stid); +done: + err = ep->com.rpl_err; + cm_id->rem_ref(cm_id); + put_ep(&ep->com); + return err; +} + +int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) +{ + int ret=0; + unsigned long flags; + int close = 0; + int fatal = 0; + struct t3cdev *tdev; + struct cxio_rdev *rdev; + + spin_lock_irqsave(&ep->com.lock, flags); + + PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, + states[ep->com.state], abrupt); + + tdev = (struct t3cdev *)ep->com.tdev; + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + fatal = 1; + close_complete_upcall(ep); + ep->com.state = DEAD; + } + switch (ep->com.state) { + case MPA_REQ_WAIT: + case MPA_REQ_SENT: + case MPA_REQ_RCVD: + case MPA_REP_SENT: + case FPDU_MODE: + close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + start_ep_timer(ep); + } + set_bit(CLOSE_SENT, &ep->com.flags); + break; + case CLOSING: + if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { + close = 1; + if (abrupt) { + stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; + } + break; + case MORIBUND: + case ABORTING: + case DEAD: + PDBG("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); + break; + default: + BUG(); + break; + } + + spin_unlock_irqrestore(&ep->com.lock, flags); + if (close) { + if (abrupt) + ret = send_abort(ep, NULL, gfp); + else + ret = send_halfclose(ep, gfp); + if (ret) + fatal = 1; + } + if (fatal) + release_ep_resources(ep); + return ret; +} + +int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, + struct l2t_entry *l2t) +{ + struct iwch_ep *ep = ctx; + + if (ep->dst != old) + return 0; + + PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, + l2t); + dst_hold(new); + l2t_release(ep->com.tdev, ep->l2t); + ep->l2t = l2t; + dst_release(old); + ep->dst = new; + return 1; +} + +/* + * All the CM events are handled on a work queue to have a safe context. + * These are the real handlers that are called from the work queue. + */ +static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = act_establish, + [CPL_ACT_OPEN_RPL] = act_open_rpl, + [CPL_RX_DATA] = rx_data, + [CPL_TX_DMA_ACK] = tx_ack, + [CPL_ABORT_RPL_RSS] = abort_rpl, + [CPL_ABORT_RPL] = abort_rpl, + [CPL_PASS_OPEN_RPL] = pass_open_rpl, + [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, + [CPL_PASS_ACCEPT_REQ] = pass_accept_req, + [CPL_PASS_ESTABLISH] = pass_establish, + [CPL_PEER_CLOSE] = peer_close, + [CPL_ABORT_REQ_RSS] = peer_abort, + [CPL_CLOSE_CON_RPL] = close_con_rpl, + [CPL_RDMA_TERMINATE] = terminate, + [CPL_RDMA_EC_STATUS] = ec_status, +}; + +static void process_work(struct work_struct *work) +{ + struct sk_buff *skb = NULL; + void *ep; + struct t3cdev *tdev; + int ret; + + while ((skb = skb_dequeue(&rxq))) { + ep = *((void **) (skb->cb)); + tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); + ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); + if (ret & CPL_RET_BUF_DONE) + kfree_skb(skb); + + /* + * ep was referenced in sched(), and is freed here. + */ + put_ep((struct iwch_ep_common *)ep); + } +} + +static DECLARE_WORK(skb_work, process_work); + +static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct iwch_ep_common *epc = ctx; + + get_ep(epc); + + /* + * Save ctx and tdev in the skb->cb area. + */ + *((void **) skb->cb) = ctx; + *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev; + + /* + * Queue the skb and schedule the worker thread. + */ + skb_queue_tail(&rxq, skb); + queue_work(workq, &skb_work); + return 0; +} + +static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb); + + if (rpl->status != CPL_ERR_NONE) { + printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u " + "for tid %u\n", rpl->status, GET_TID(rpl)); + } + return CPL_RET_BUF_DONE; +} + +/* + * All upcalls from the T3 Core go to sched() to schedule the + * processing on a work queue. + */ +cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = sched, + [CPL_ACT_OPEN_RPL] = sched, + [CPL_RX_DATA] = sched, + [CPL_TX_DMA_ACK] = sched, + [CPL_ABORT_RPL_RSS] = sched, + [CPL_ABORT_RPL] = sched, + [CPL_PASS_OPEN_RPL] = sched, + [CPL_CLOSE_LISTSRV_RPL] = sched, + [CPL_PASS_ACCEPT_REQ] = sched, + [CPL_PASS_ESTABLISH] = sched, + [CPL_PEER_CLOSE] = sched, + [CPL_CLOSE_CON_RPL] = sched, + [CPL_ABORT_REQ_RSS] = sched, + [CPL_RDMA_TERMINATE] = sched, + [CPL_RDMA_EC_STATUS] = sched, + [CPL_SET_TCB_RPL] = set_tcb_rpl, +}; + +int __init iwch_cm_init(void) +{ + skb_queue_head_init(&rxq); + + workq = create_singlethread_workqueue("iw_cxgb3"); + if (!workq) + return -ENOMEM; + + return 0; +} + +void __exit iwch_cm_term(void) +{ + flush_workqueue(workq); + destroy_workqueue(workq); +} diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h new file mode 100644 index 00000000..b9efadff --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _IWCH_CM_H_ +#define _IWCH_CM_H_ + +#include +#include +#include +#include + +#include +#include + +#include "cxgb3_offload.h" +#include "iwch_provider.h" + +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" + +#define MPA_MAX_PRIVATE_DATA 256 +#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */ +#define MPA_REJECT 0x20 +#define MPA_CRC 0x40 +#define MPA_MARKERS 0x80 +#define MPA_FLAGS_MASK 0xE0 + +#define put_ep(ep) { \ + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ + kref_put(&((ep)->kref), __free_ep); \ +} + +#define get_ep(ep) { \ + PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ + kref_get(&((ep)->kref)); \ +} + +struct mpa_message { + u8 key[16]; + u8 flags; + u8 revision; + __be16 private_data_size; + u8 private_data[0]; +}; + +struct terminate_message { + u8 layer_etype; + u8 ecode; + __be16 hdrct_rsvd; + u8 len_hdrs[0]; +}; + +#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) + +enum iwch_layers_types { + LAYER_RDMAP = 0x00, + LAYER_DDP = 0x10, + LAYER_MPA = 0x20, + RDMAP_LOCAL_CATA = 0x00, + RDMAP_REMOTE_PROT = 0x01, + RDMAP_REMOTE_OP = 0x02, + DDP_LOCAL_CATA = 0x00, + DDP_TAGGED_ERR = 0x01, + DDP_UNTAGGED_ERR = 0x02, + DDP_LLP = 0x03 +}; + +enum iwch_rdma_ecodes { + RDMAP_INV_STAG = 0x00, + RDMAP_BASE_BOUNDS = 0x01, + RDMAP_ACC_VIOL = 0x02, + RDMAP_STAG_NOT_ASSOC = 0x03, + RDMAP_TO_WRAP = 0x04, + RDMAP_INV_VERS = 0x05, + RDMAP_INV_OPCODE = 0x06, + RDMAP_STREAM_CATA = 0x07, + RDMAP_GLOBAL_CATA = 0x08, + RDMAP_CANT_INV_STAG = 0x09, + RDMAP_UNSPECIFIED = 0xff +}; + +enum iwch_ddp_ecodes { + DDPT_INV_STAG = 0x00, + DDPT_BASE_BOUNDS = 0x01, + DDPT_STAG_NOT_ASSOC = 0x02, + DDPT_TO_WRAP = 0x03, + DDPT_INV_VERS = 0x04, + DDPU_INV_QN = 0x01, + DDPU_INV_MSN_NOBUF = 0x02, + DDPU_INV_MSN_RANGE = 0x03, + DDPU_INV_MO = 0x04, + DDPU_MSG_TOOBIG = 0x05, + DDPU_INV_VERS = 0x06 +}; + +enum iwch_mpa_ecodes { + MPA_CRC_ERR = 0x02, + MPA_MARKER_ERR = 0x03 +}; + +enum iwch_ep_state { + IDLE = 0, + LISTEN, + CONNECTING, + MPA_REQ_WAIT, + MPA_REQ_SENT, + MPA_REQ_RCVD, + MPA_REP_SENT, + FPDU_MODE, + ABORTING, + CLOSING, + MORIBUND, + DEAD, +}; + +enum iwch_ep_flags { + PEER_ABORT_IN_PROGRESS = 0, + ABORT_REQ_IN_PROGRESS = 1, + RELEASE_RESOURCES = 2, + CLOSE_SENT = 3, +}; + +struct iwch_ep_common { + struct iw_cm_id *cm_id; + struct iwch_qp *qp; + struct t3cdev *tdev; + enum iwch_ep_state state; + struct kref kref; + spinlock_t lock; + struct sockaddr_in local_addr; + struct sockaddr_in remote_addr; + wait_queue_head_t waitq; + int rpl_done; + int rpl_err; + unsigned long flags; +}; + +struct iwch_listen_ep { + struct iwch_ep_common com; + unsigned int stid; + int backlog; +}; + +struct iwch_ep { + struct iwch_ep_common com; + struct iwch_ep *parent_ep; + struct timer_list timer; + unsigned int atid; + u32 hwtid; + u32 snd_seq; + u32 rcv_seq; + struct l2t_entry *l2t; + struct dst_entry *dst; + struct sk_buff *mpa_skb; + struct iwch_mpa_attributes mpa_attr; + unsigned int mpa_pkt_len; + u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; + u8 tos; + u16 emss; + u16 plen; + u32 ird; + u32 ord; +}; + +static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline int compute_wscale(int win) +{ + int wscale = 0; + + while (wscale < 14 && (65535<cq); + + if (!rd_cqe) + return 0; + + qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); + if (!qhp) + wq = NULL; + else { + spin_lock(&qhp->lock); + wq = &(qhp->wq); + } + ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, + &credit); + if (t3a_device(chp->rhp) && credit) { + PDBG("%s updating %d cq credits on id %d\n", __func__, + credit, chp->cq.cqid); + cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); + } + + if (ret) { + ret = -EAGAIN; + goto out; + } + ret = 1; + + wc->wr_id = cookie; + wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + wc->wc_flags = 0; + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " + "lo 0x%x cookie 0x%llx\n", __func__, + CQE_QPID(cqe), CQE_TYPE(cqe), + CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe), + CQE_WRID_LOW(cqe), (unsigned long long) cookie); + + if (CQE_TYPE(cqe) == 0) { + if (!CQE_STATUS(cqe)) + wc->byte_len = CQE_LEN(cqe); + else + wc->byte_len = 0; + wc->opcode = IB_WC_RECV; + if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || + CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { + wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } + } else { + switch (CQE_OPCODE(cqe)) { + case T3_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case T3_READ_REQ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = CQE_LEN(cqe); + break; + case T3_SEND: + case T3_SEND_WITH_SE: + case T3_SEND_WITH_INV: + case T3_SEND_WITH_SE_INV: + wc->opcode = IB_WC_SEND; + break; + case T3_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + + case T3_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; + case T3_FAST_REGISTER: + wc->opcode = IB_WC_FAST_REG_MR; + break; + default: + printk(KERN_ERR MOD "Unexpected opcode %d " + "in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(cqe), CQE_QPID(cqe)); + ret = -EINVAL; + goto out; + } + } + + if (cqe_flushed) + wc->status = IB_WC_WR_FLUSH_ERR; + else { + + switch (CQE_STATUS(cqe)) { + case TPT_ERR_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case TPT_ERR_STAG: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case TPT_ERR_PDID: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case TPT_ERR_QPID: + case TPT_ERR_ACCESS: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case TPT_ERR_WRAP: + wc->status = IB_WC_GENERAL_ERR; + break; + case TPT_ERR_BOUND: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case TPT_ERR_INVALIDATE_SHARED_MR: + case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: + wc->status = IB_WC_MW_BIND_ERR; + break; + case TPT_ERR_CRC: + case TPT_ERR_MARKER: + case TPT_ERR_PDU_LEN_ERR: + case TPT_ERR_OUT_OF_RQE: + case TPT_ERR_DDP_VERSION: + case TPT_ERR_RDMA_VERSION: + case TPT_ERR_DDP_QUEUE_NUM: + case TPT_ERR_MSN: + case TPT_ERR_TBIT: + case TPT_ERR_MO: + case TPT_ERR_MSN_RANGE: + case TPT_ERR_IRD_OVERFLOW: + case TPT_ERR_OPCODE: + wc->status = IB_WC_FATAL_ERR; + break; + case TPT_ERR_SWFLUSH: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + default: + printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for " + "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe)); + ret = -EINVAL; + } + } +out: + if (wq) + spin_unlock(&qhp->lock); + return ret; +} + +int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct iwch_dev *rhp; + struct iwch_cq *chp; + unsigned long flags; + int npolled; + int err = 0; + + chp = to_iwch_cq(ibcq); + rhp = chp->rhp; + + spin_lock_irqsave(&chp->lock, flags); + for (npolled = 0; npolled < num_entries; ++npolled) { +#ifdef DEBUG + int i=0; +#endif + + /* + * Because T3 can post CQEs that are _not_ associated + * with a WR, we might have to poll again after removing + * one of these. + */ + do { + err = iwch_poll_cq_one(rhp, chp, wc + npolled); +#ifdef DEBUG + BUG_ON(++i > 1000); +#endif + } while (err == -EAGAIN); + if (err <= 0) + break; + } + spin_unlock_irqrestore(&chp->lock, flags); + + if (err < 0) + return err; + else { + return npolled; + } +} diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c new file mode 100644 index 00000000..abcc9e76 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include "iwch_provider.h" +#include "iwch.h" +#include "iwch_cm.h" +#include "cxio_hal.h" +#include "cxio_wr.h" + +static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, + struct respQ_msg_t *rsp_msg, + enum ib_event_type ib_event, + int send_term) +{ + struct ib_event event; + struct iwch_qp_attributes attrs; + struct iwch_qp *qhp; + unsigned long flag; + + spin_lock(&rnicp->lock); + qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); + + if (!qhp) { + printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n", + __func__, CQE_STATUS(rsp_msg->cqe), + CQE_QPID(rsp_msg->cqe)); + spin_unlock(&rnicp->lock); + return; + } + + if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || + (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { + PDBG("%s AE received after RTS - " + "qp state %d qpid 0x%x status 0x%x\n", __func__, + qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe)); + spin_unlock(&rnicp->lock); + return; + } + + printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x " + "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__, + CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), + CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), + CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); + + atomic_inc(&qhp->refcnt); + spin_unlock(&rnicp->lock); + + if (qhp->attr.state == IWCH_QP_STATE_RTS) { + attrs.next_state = IWCH_QP_STATE_TERMINATE; + iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (send_term) + iwch_post_terminate(qhp, rsp_msg); + } + + event.event = ib_event; + event.device = chp->ibcq.device; + if (ib_event == IB_EVENT_CQ_ERR) + event.element.cq = &chp->ibcq; + else + event.element.qp = &qhp->ibqp; + + if (qhp->ibqp.event_handler) + (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + + if (atomic_dec_and_test(&qhp->refcnt)) + wake_up(&qhp->wait); +} + +void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) +{ + struct iwch_dev *rnicp; + struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; + struct iwch_cq *chp; + struct iwch_qp *qhp; + u32 cqid = RSPQ_CQID(rsp_msg); + unsigned long flag; + + rnicp = (struct iwch_dev *) rdev_p->ulp; + spin_lock(&rnicp->lock); + chp = get_chp(rnicp, cqid); + qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); + if (!chp || !qhp) { + printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d " + "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n", + cqid, CQE_QPID(rsp_msg->cqe), + CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), + CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), + CQE_WRID_LOW(rsp_msg->cqe)); + spin_unlock(&rnicp->lock); + goto out; + } + iwch_qp_add_ref(&qhp->ibqp); + atomic_inc(&chp->refcnt); + spin_unlock(&rnicp->lock); + + /* + * 1) completion of our sending a TERMINATE. + * 2) incoming TERMINATE message. + */ + if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && + (CQE_STATUS(rsp_msg->cqe) == 0)) { + if (SQ_TYPE(rsp_msg->cqe)) { + PDBG("%s QPID 0x%x ep %p disconnecting\n", + __func__, qhp->wq.qpid, qhp->ep); + iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); + } else { + PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__, + qhp->wq.qpid); + post_qp_event(rnicp, chp, rsp_msg, + IB_EVENT_QP_REQ_ERR, 0); + iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); + } + goto done; + } + + /* Bad incoming Read request */ + if (SQ_TYPE(rsp_msg->cqe) && + (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { + post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); + goto done; + } + + /* Bad incoming write */ + if (RQ_TYPE(rsp_msg->cqe) && + (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { + post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); + goto done; + } + + switch (CQE_STATUS(rsp_msg->cqe)) { + + /* Completion Events */ + case TPT_ERR_SUCCESS: + + /* + * Confirm the destination entry if this is a RECV completion. + */ + if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) + dst_confirm(qhp->ep->dst); + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + break; + + case TPT_ERR_STAG: + case TPT_ERR_PDID: + case TPT_ERR_QPID: + case TPT_ERR_ACCESS: + case TPT_ERR_WRAP: + case TPT_ERR_BOUND: + case TPT_ERR_INVALIDATE_SHARED_MR: + case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: + post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); + break; + + /* Device Fatal Errors */ + case TPT_ERR_ECC: + case TPT_ERR_ECC_PSTAG: + case TPT_ERR_INTERNAL_ERR: + post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); + break; + + /* QP Fatal Errors */ + case TPT_ERR_OUT_OF_RQE: + case TPT_ERR_PBL_ADDR_BOUND: + case TPT_ERR_CRC: + case TPT_ERR_MARKER: + case TPT_ERR_PDU_LEN_ERR: + case TPT_ERR_DDP_VERSION: + case TPT_ERR_RDMA_VERSION: + case TPT_ERR_OPCODE: + case TPT_ERR_DDP_QUEUE_NUM: + case TPT_ERR_MSN: + case TPT_ERR_TBIT: + case TPT_ERR_MO: + case TPT_ERR_MSN_GAP: + case TPT_ERR_MSN_RANGE: + case TPT_ERR_RQE_ADDR_BOUND: + case TPT_ERR_IRD_OVERFLOW: + post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); + break; + + default: + printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n", + CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); + post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); + break; + } +done: + if (atomic_dec_and_test(&chp->refcnt)) + wake_up(&chp->wait); + iwch_qp_rem_ref(&qhp->ibqp); +out: + dev_kfree_skb_irq(skb); +} diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c new file mode 100644 index 00000000..5c36ee28 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +#include +#include + +#include "cxio_hal.h" +#include "cxio_resource.h" +#include "iwch.h" +#include "iwch_provider.h" + +static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) +{ + u32 mmid; + + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); +} + +int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, int shift) +{ + u32 stag; + int ret; + + if (cxio_register_phys_mem(&rhp->rdev, + &stag, mhp->attr.pdid, + mhp->attr.perms, + mhp->attr.zbva, + mhp->attr.va_fbo, + mhp->attr.len, + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) + return -ENOMEM; + + ret = iwch_finish_mem_reg(mhp, stag); + if (ret) + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + return ret; +} + +int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, + int shift, + int npages) +{ + u32 stag; + int ret; + + /* We could support this... */ + if (npages > mhp->attr.pbl_size) + return -ENOMEM; + + stag = mhp->attr.stag; + if (cxio_reregister_phys_mem(&rhp->rdev, + &stag, mhp->attr.pdid, + mhp->attr.perms, + mhp->attr.zbva, + mhp->attr.va_fbo, + mhp->attr.len, + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) + return -ENOMEM; + + ret = iwch_finish_mem_reg(mhp, stag); + if (ret) + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + + return ret; +} + +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) +{ + mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, + npages << 3); + + if (!mhp->attr.pbl_addr) + return -ENOMEM; + + mhp->attr.pbl_size = npages; + + return 0; +} + +void iwch_free_pbl(struct iwch_mr *mhp) +{ + cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +} + +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) +{ + return cxio_write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (offset << 3), npages); +} + +int build_phys_page_list(struct ib_phys_buf *buffer_list, + int num_phys_buf, + u64 *iova_start, + u64 *total_size, + int *npages, + int *shift, + __be64 **page_list) +{ + u64 mask; + int i, j, n; + + mask = 0; + *total_size = 0; + for (i = 0; i < num_phys_buf; ++i) { + if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) + return -EINVAL; + if (i != 0 && i != num_phys_buf - 1 && + (buffer_list[i].size & ~PAGE_MASK)) + return -EINVAL; + *total_size += buffer_list[i].size; + if (i > 0) + mask |= buffer_list[i].addr; + else + mask |= buffer_list[i].addr & PAGE_MASK; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + else + mask |= (buffer_list[i].addr + buffer_list[i].size + + PAGE_SIZE - 1) & PAGE_MASK; + } + + if (*total_size > 0xFFFFFFFFULL) + return -ENOMEM; + + /* Find largest page shift we can use to cover buffers */ + for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) + if ((1ULL << *shift) & mask) + break; + + buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); + buffer_list[0].addr &= ~0ull << *shift; + + *npages = 0; + for (i = 0; i < num_phys_buf; ++i) + *npages += (buffer_list[i].size + + (1ULL << *shift) - 1) >> *shift; + + if (!*npages) + return -EINVAL; + + *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL); + if (!*page_list) + return -ENOMEM; + + n = 0; + for (i = 0; i < num_phys_buf; ++i) + for (j = 0; + j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; + ++j) + (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr + + ((u64) j << *shift)); + + PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n", + __func__, (unsigned long long) *iova_start, + (unsigned long long) mask, *shift, (unsigned long long) *total_size, + *npages); + + return 0; + +} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c new file mode 100644 index 00000000..0bdf09aa --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -0,0 +1,1468 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "cxio_hal.h" +#include "iwch.h" +#include "iwch_provider.h" +#include "iwch_cm.h" +#include "iwch_user.h" +#include "common.h" + +static struct ib_ah *iwch_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + return ERR_PTR(-ENOSYS); +} + +static int iwch_ah_destroy(struct ib_ah *ah) +{ + return -ENOSYS; +} + +static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -ENOSYS; +} + +static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -ENOSYS; +} + +static int iwch_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + return -ENOSYS; +} + +static int iwch_dealloc_ucontext(struct ib_ucontext *context) +{ + struct iwch_dev *rhp = to_iwch_dev(context->device); + struct iwch_ucontext *ucontext = to_iwch_ucontext(context); + struct iwch_mm_entry *mm, *tmp; + + PDBG("%s context %p\n", __func__, context); + list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) + kfree(mm); + cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); + kfree(ucontext); + return 0; +} + +static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct iwch_ucontext *context; + struct iwch_dev *rhp = to_iwch_dev(ibdev); + + PDBG("%s ibdev %p\n", __func__, ibdev); + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + cxio_init_ucontext(&rhp->rdev, &context->uctx); + INIT_LIST_HEAD(&context->mmaps); + spin_lock_init(&context->mmap_lock); + return &context->ibucontext; +} + +static int iwch_destroy_cq(struct ib_cq *ib_cq) +{ + struct iwch_cq *chp; + + PDBG("%s ib_cq %p\n", __func__, ib_cq); + chp = to_iwch_cq(ib_cq); + + remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + atomic_dec(&chp->refcnt); + wait_event(chp->wait, !atomic_read(&chp->refcnt)); + + cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); + kfree(chp); + return 0; +} + +static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *ib_context, + struct ib_udata *udata) +{ + struct iwch_dev *rhp; + struct iwch_cq *chp; + struct iwch_create_cq_resp uresp; + struct iwch_create_cq_req ureq; + struct iwch_ucontext *ucontext = NULL; + static int warned; + size_t resplen; + + PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + rhp = to_iwch_dev(ibdev); + chp = kzalloc(sizeof(*chp), GFP_KERNEL); + if (!chp) + return ERR_PTR(-ENOMEM); + + if (ib_context) { + ucontext = to_iwch_ucontext(ib_context); + if (!t3a_device(rhp)) { + if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { + kfree(chp); + return ERR_PTR(-EFAULT); + } + chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr; + } + } + + if (t3a_device(rhp)) { + + /* + * T3A: Add some fluff to handle extra CQEs inserted + * for various errors. + * Additional CQE possibilities: + * TERMINATE, + * incoming RDMA WRITE Failures + * incoming RDMA READ REQUEST FAILUREs + * NOTE: We cannot ensure the CQ won't overflow. + */ + entries += 16; + } + entries = roundup_pow_of_two(entries); + chp->cq.size_log2 = ilog2(entries); + + if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) { + kfree(chp); + return ERR_PTR(-ENOMEM); + } + chp->rhp = rhp; + chp->ibcq.cqe = 1 << chp->cq.size_log2; + spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); + atomic_set(&chp->refcnt, 1); + init_waitqueue_head(&chp->wait); + if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { + cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); + kfree(chp); + return ERR_PTR(-ENOMEM); + } + + if (ucontext) { + struct iwch_mm_entry *mm; + + mm = kmalloc(sizeof *mm, GFP_KERNEL); + if (!mm) { + iwch_destroy_cq(&chp->ibcq); + return ERR_PTR(-ENOMEM); + } + uresp.cqid = chp->cq.cqid; + uresp.size_log2 = chp->cq.size_log2; + spin_lock(&ucontext->mmap_lock); + uresp.key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + if (udata->outlen < sizeof uresp) { + if (!warned++) + printk(KERN_WARNING MOD "Warning - " + "downlevel libcxgb3 (non-fatal).\n"); + mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * + sizeof(struct t3_cqe)); + resplen = sizeof(struct iwch_create_cq_resp_v0); + } else { + mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * + sizeof(struct t3_cqe)); + uresp.memsize = mm->len; + resplen = sizeof uresp; + } + if (ib_copy_to_udata(udata, &uresp, resplen)) { + kfree(mm); + iwch_destroy_cq(&chp->ibcq); + return ERR_PTR(-EFAULT); + } + insert_mmap(ucontext, mm); + } + PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n", + chp->cq.cqid, chp, (1 << chp->cq.size_log2), + (unsigned long long) chp->cq.dma_addr); + return &chp->ibcq; +} + +static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ +#ifdef notyet + struct iwch_cq *chp = to_iwch_cq(cq); + struct t3_cq oldcq, newcq; + int ret; + + PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe); + + /* We don't downsize... */ + if (cqe <= cq->cqe) + return 0; + + /* create new t3_cq with new size */ + cqe = roundup_pow_of_two(cqe+1); + newcq.size_log2 = ilog2(cqe); + + /* Dont allow resize to less than the current wce count */ + if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) { + return -ENOMEM; + } + + /* Quiesce all QPs using this CQ */ + ret = iwch_quiesce_qps(chp); + if (ret) { + return ret; + } + + ret = cxio_create_cq(&chp->rhp->rdev, &newcq); + if (ret) { + return ret; + } + + /* copy CQEs */ + memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) * + sizeof(struct t3_cqe)); + + /* old iwch_qp gets new t3_cq but keeps old cqid */ + oldcq = chp->cq; + chp->cq = newcq; + chp->cq.cqid = oldcq.cqid; + + /* resize new t3_cq to update the HW context */ + ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq); + if (ret) { + chp->cq = oldcq; + return ret; + } + chp->ibcq.cqe = (1<cq.size_log2) - 1; + + /* destroy old t3_cq */ + oldcq.cqid = newcq.cqid; + ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq); + if (ret) { + printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n", + __func__, ret); + } + + /* add user hooks here */ + + /* resume qps */ + ret = iwch_resume_qps(chp); + return ret; +#else + return -ENOSYS; +#endif +} + +static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct iwch_dev *rhp; + struct iwch_cq *chp; + enum t3_cq_opcode cq_op; + int err; + unsigned long flag; + u32 rptr; + + chp = to_iwch_cq(ibcq); + rhp = chp->rhp; + if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) + cq_op = CQ_ARM_SE; + else + cq_op = CQ_ARM_AN; + if (chp->user_rptr_addr) { + if (get_user(rptr, chp->user_rptr_addr)) + return -EFAULT; + spin_lock_irqsave(&chp->lock, flag); + chp->cq.rptr = rptr; + } else + spin_lock_irqsave(&chp->lock, flag); + PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr); + err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); + spin_unlock_irqrestore(&chp->lock, flag); + if (err < 0) + printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err, + chp->cq.cqid); + if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) + err = 0; + return err; +} + +static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + int len = vma->vm_end - vma->vm_start; + u32 key = vma->vm_pgoff << PAGE_SHIFT; + struct cxio_rdev *rdev_p; + int ret = 0; + struct iwch_mm_entry *mm; + struct iwch_ucontext *ucontext; + u64 addr; + + PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, + key, len); + + if (vma->vm_start & (PAGE_SIZE-1)) { + return -EINVAL; + } + + rdev_p = &(to_iwch_dev(context->device)->rdev); + ucontext = to_iwch_ucontext(context); + + mm = remove_mmap(ucontext, key, len); + if (!mm) + return -EINVAL; + addr = mm->addr; + kfree(mm); + + if ((addr >= rdev_p->rnic_info.udbell_physbase) && + (addr < (rdev_p->rnic_info.udbell_physbase + + rdev_p->rnic_info.udbell_len))) { + + /* + * Map T3 DB register. + */ + if (vma->vm_flags & VM_READ) { + return -EPERM; + } + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + vma->vm_flags &= ~VM_MAYREAD; + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else { + + /* + * Map WQ or CQ contig dma memory... + */ + ret = remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } + + return ret; +} + +static int iwch_deallocate_pd(struct ib_pd *pd) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + + php = to_iwch_pd(pd); + rhp = php->rhp; + PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); + cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); + kfree(php); + return 0; +} + +static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct iwch_pd *php; + u32 pdid; + struct iwch_dev *rhp; + + PDBG("%s ibdev %p\n", __func__, ibdev); + rhp = (struct iwch_dev *) ibdev; + pdid = cxio_hal_get_pdid(rhp->rdev.rscp); + if (!pdid) + return ERR_PTR(-EINVAL); + php = kzalloc(sizeof(*php), GFP_KERNEL); + if (!php) { + cxio_hal_put_pdid(rhp->rdev.rscp, pdid); + return ERR_PTR(-ENOMEM); + } + php->pdid = pdid; + php->rhp = rhp; + if (context) { + if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) { + iwch_deallocate_pd(&php->ibpd); + return ERR_PTR(-EFAULT); + } + } + PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); + return &php->ibpd; +} + +static int iwch_dereg_mr(struct ib_mr *ib_mr) +{ + struct iwch_dev *rhp; + struct iwch_mr *mhp; + u32 mmid; + + PDBG("%s ib_mr %p\n", __func__, ib_mr); + /* There can be no memory windows */ + if (atomic_read(&ib_mr->usecnt)) + return -EINVAL; + + mhp = to_iwch_mr(ib_mr); + rhp = mhp->rhp; + mmid = mhp->attr.stag >> 8; + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + iwch_free_pbl(mhp); + remove_handle(rhp, &rhp->mmidr, mmid); + if (mhp->kva) + kfree((void *) (unsigned long) mhp->kva); + if (mhp->umem) + ib_umem_release(mhp->umem); + PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); + kfree(mhp); + return 0; +} + +static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, + u64 *iova_start) +{ + __be64 *page_list; + int shift; + u64 total_size; + int npages; + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + int ret; + + PDBG("%s ib_pd %p\n", __func__, pd); + php = to_iwch_pd(pd); + rhp = php->rhp; + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + + /* First check that we have enough alignment */ + if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + if (num_phys_buf > 1 && + ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, + &total_size, &npages, &shift, &page_list); + if (ret) + goto err; + + ret = iwch_alloc_pbl(mhp, npages); + if (ret) { + kfree(page_list); + goto err_pbl; + } + + ret = iwch_write_pbl(mhp, page_list, npages, 0); + kfree(page_list); + if (ret) + goto err_pbl; + + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + + mhp->attr.perms = iwch_ib_to_tpt_access(acc); + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + ret = iwch_register_mem(rhp, php, mhp, shift); + if (ret) + goto err_pbl; + + return &mhp->ibmr; + +err_pbl: + iwch_free_pbl(mhp); + +err: + kfree(mhp); + return ERR_PTR(ret); + +} + +static int iwch_reregister_phys_mem(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, u64 * iova_start) +{ + + struct iwch_mr mh, *mhp; + struct iwch_pd *php; + struct iwch_dev *rhp; + __be64 *page_list = NULL; + int shift = 0; + u64 total_size; + int npages; + int ret; + + PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); + + /* There can be no memory windows */ + if (atomic_read(&mr->usecnt)) + return -EINVAL; + + mhp = to_iwch_mr(mr); + rhp = mhp->rhp; + php = to_iwch_pd(mr->pd); + + /* make sure we are on the same adapter */ + if (rhp != php->rhp) + return -EINVAL; + + memcpy(&mh, mhp, sizeof *mhp); + + if (mr_rereg_mask & IB_MR_REREG_PD) + php = to_iwch_pd(pd); + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + mh.attr.perms = iwch_ib_to_tpt_access(acc); + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + ret = build_phys_page_list(buffer_list, num_phys_buf, + iova_start, + &total_size, &npages, + &shift, &page_list); + if (ret) + return ret; + } + + ret = iwch_reregister_mem(rhp, php, &mh, shift, npages); + kfree(page_list); + if (ret) { + return ret; + } + if (mr_rereg_mask & IB_MR_REREG_PD) + mhp->attr.pdid = php->pdid; + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + mhp->attr.perms = iwch_ib_to_tpt_access(acc); + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + mhp->attr.zbva = 0; + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + } + + return 0; +} + + +static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + __be64 *pages; + int shift, n, len; + int i, j, k; + int err = 0; + struct ib_umem_chunk *chunk; + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + struct iwch_reg_user_mr_resp uresp; + + PDBG("%s ib_pd %p\n", __func__, pd); + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); + if (IS_ERR(mhp->umem)) { + err = PTR_ERR(mhp->umem); + kfree(mhp); + return ERR_PTR(err); + } + + shift = ffs(mhp->umem->page_size) - 1; + + n = 0; + list_for_each_entry(chunk, &mhp->umem->chunk_list, list) + n += chunk->nents; + + err = iwch_alloc_pbl(mhp, n); + if (err) + goto err; + + pages = (__be64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_pbl; + } + + i = n = 0; + + list_for_each_entry(chunk, &mhp->umem->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address( + &chunk->page_list[j]) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = iwch_write_pbl(mhp, pages, i, n); + if (err) + goto pbl_done; + n += i; + i = 0; + } + } + } + + if (i) + err = iwch_write_pbl(mhp, pages, i, n); + +pbl_done: + free_page((unsigned long) pages); + if (err) + goto err_pbl; + + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + mhp->attr.perms = iwch_ib_to_tpt_access(acc); + mhp->attr.va_fbo = virt; + mhp->attr.page_size = shift - 12; + mhp->attr.len = (u32) length; + + err = iwch_register_mem(rhp, php, mhp, shift); + if (err) + goto err_pbl; + + if (udata && !t3a_device(rhp)) { + uresp.pbl_addr = (mhp->attr.pbl_addr - + rhp->rdev.rnic_info.pbl_base) >> 3; + PDBG("%s user resp pbl_addr 0x%x\n", __func__, + uresp.pbl_addr); + + if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + iwch_dereg_mr(&mhp->ibmr); + err = -EFAULT; + goto err; + } + } + + return &mhp->ibmr; + +err_pbl: + iwch_free_pbl(mhp); + +err: + ib_umem_release(mhp->umem); + kfree(mhp); + return ERR_PTR(err); +} + +static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ib_phys_buf bl; + u64 kva; + struct ib_mr *ibmr; + + PDBG("%s ib_pd %p\n", __func__, pd); + + /* + * T3 only supports 32 bits of size. + */ + bl.size = 0xffffffff; + bl.addr = 0; + kva = 0; + ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva); + return ibmr; +} + +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mw *mhp; + u32 mmid; + u32 stag = 0; + int ret; + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); + if (ret) { + kfree(mhp); + return ERR_PTR(ret); + } + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.type = TPT_MW; + mhp->attr.stag = stag; + mmid = (stag) >> 8; + mhp->ibmw.rkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmw); +} + +static int iwch_dealloc_mw(struct ib_mw *mw) +{ + struct iwch_dev *rhp; + struct iwch_mw *mhp; + u32 mmid; + + mhp = to_iwch_mw(mw); + rhp = mhp->rhp; + mmid = (mw->rkey) >> 8; + cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); + remove_handle(rhp, &rhp->mmidr, mmid); + kfree(mhp); + PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); + return 0; +} + +static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret = 0; + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + goto err; + + mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, pbl_depth); + if (ret) + goto err1; + mhp->attr.pbl_size = pbl_depth; + ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + goto err2; + mhp->attr.pdid = php->pdid; + mhp->attr.type = TPT_NON_SHARED_MR; + mhp->attr.stag = stag; + mhp->attr.state = 1; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) + goto err3; + + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmr); +err3: + cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); +err2: + iwch_free_pbl(mhp); +err1: + kfree(mhp); +err: + return ERR_PTR(ret); +} + +static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( + struct ib_device *device, + int page_list_len) +{ + struct ib_fast_reg_page_list *page_list; + + page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), + GFP_KERNEL); + if (!page_list) + return ERR_PTR(-ENOMEM); + + page_list->page_list = (u64 *)(page_list + 1); + page_list->max_page_list_len = page_list_len; + + return page_list; +} + +static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) +{ + kfree(page_list); +} + +static int iwch_destroy_qp(struct ib_qp *ib_qp) +{ + struct iwch_dev *rhp; + struct iwch_qp *qhp; + struct iwch_qp_attributes attrs; + struct iwch_ucontext *ucontext; + + qhp = to_iwch_qp(ib_qp); + rhp = qhp->rhp; + + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); + wait_event(qhp->wait, !qhp->ep); + + remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid); + + atomic_dec(&qhp->refcnt); + wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + + ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context) + : NULL; + cxio_destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__, + ib_qp, qhp->wq.qpid, qhp); + kfree(qhp); + return 0; +} + +static struct ib_qp *iwch_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct iwch_dev *rhp; + struct iwch_qp *qhp; + struct iwch_pd *php; + struct iwch_cq *schp; + struct iwch_cq *rchp; + struct iwch_create_qp_resp uresp; + int wqsize, sqsize, rqsize; + struct iwch_ucontext *ucontext; + + PDBG("%s ib_pd %p\n", __func__, pd); + if (attrs->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + php = to_iwch_pd(pd); + rhp = php->rhp; + schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); + rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); + if (!schp || !rchp) + return ERR_PTR(-EINVAL); + + /* The RQT size must be # of entries + 1 rounded up to a power of two */ + rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); + if (rqsize == attrs->cap.max_recv_wr) + rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); + + /* T3 doesn't support RQT depth < 16 */ + if (rqsize < 16) + rqsize = 16; + + if (rqsize > T3_MAX_RQ_SIZE) + return ERR_PTR(-EINVAL); + + if (attrs->cap.max_inline_data > T3_MAX_INLINE) + return ERR_PTR(-EINVAL); + + /* + * NOTE: The SQ and total WQ sizes don't need to be + * a power of two. However, all the code assumes + * they are. EG: Q_FREECNT() and friends. + */ + sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); + wqsize = roundup_pow_of_two(rqsize + sqsize); + + /* + * Kernel users need more wq space for fastreg WRs which can take + * 2 WR fragments. + */ + ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + if (!ucontext && wqsize < (rqsize + (2 * sqsize))) + wqsize = roundup_pow_of_two(rqsize + + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); + PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__, + wqsize, sqsize, rqsize); + qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); + if (!qhp) + return ERR_PTR(-ENOMEM); + qhp->wq.size_log2 = ilog2(wqsize); + qhp->wq.rq_size_log2 = ilog2(rqsize); + qhp->wq.sq_size_log2 = ilog2(sqsize); + if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { + kfree(qhp); + return ERR_PTR(-ENOMEM); + } + + attrs->cap.max_recv_wr = rqsize - 1; + attrs->cap.max_send_wr = sqsize; + attrs->cap.max_inline_data = T3_MAX_INLINE; + + qhp->rhp = rhp; + qhp->attr.pd = php->pdid; + qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; + qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; + qhp->attr.sq_num_entries = attrs->cap.max_send_wr; + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.sq_max_sges = attrs->cap.max_send_sge; + qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + qhp->attr.state = IWCH_QP_STATE_IDLE; + qhp->attr.next_state = IWCH_QP_STATE_IDLE; + + /* + * XXX - These don't get passed in from the openib user + * at create time. The CM sets them via a QP modify. + * Need to fix... I think the CM should + */ + qhp->attr.enable_rdma_read = 1; + qhp->attr.enable_rdma_write = 1; + qhp->attr.enable_bind = 1; + qhp->attr.max_ord = 1; + qhp->attr.max_ird = 1; + + spin_lock_init(&qhp->lock); + init_waitqueue_head(&qhp->wait); + atomic_set(&qhp->refcnt, 1); + + if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) { + cxio_destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + kfree(qhp); + return ERR_PTR(-ENOMEM); + } + + if (udata) { + + struct iwch_mm_entry *mm1, *mm2; + + mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); + if (!mm1) { + iwch_destroy_qp(&qhp->ibqp); + return ERR_PTR(-ENOMEM); + } + + mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); + if (!mm2) { + kfree(mm1); + iwch_destroy_qp(&qhp->ibqp); + return ERR_PTR(-ENOMEM); + } + + uresp.qpid = qhp->wq.qpid; + uresp.size_log2 = qhp->wq.size_log2; + uresp.sq_size_log2 = qhp->wq.sq_size_log2; + uresp.rq_size_log2 = qhp->wq.rq_size_log2; + spin_lock(&ucontext->mmap_lock); + uresp.key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.db_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + kfree(mm1); + kfree(mm2); + iwch_destroy_qp(&qhp->ibqp); + return ERR_PTR(-EFAULT); + } + mm1->key = uresp.key; + mm1->addr = virt_to_phys(qhp->wq.queue); + mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr)); + insert_mmap(ucontext, mm1); + mm2->key = uresp.db_key; + mm2->addr = qhp->wq.udb & PAGE_MASK; + mm2->len = PAGE_SIZE; + insert_mmap(ucontext, mm2); + } + qhp->ibqp.qp_num = qhp->wq.qpid; + init_timer(&(qhp->timer)); + PDBG("%s sq_num_entries %d, rq_num_entries %d " + "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", + __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, + qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, + 1 << qhp->wq.size_log2, qhp->wq.rq_addr); + return &qhp->ibqp; +} + +static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct iwch_dev *rhp; + struct iwch_qp *qhp; + enum iwch_qp_attr_mask mask = 0; + struct iwch_qp_attributes attrs; + + PDBG("%s ib_qp %p\n", __func__, ibqp); + + /* iwarp does not support the RTR state */ + if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) + attr_mask &= ~IB_QP_STATE; + + /* Make sure we still have something left to do */ + if (!attr_mask) + return 0; + + memset(&attrs, 0, sizeof attrs); + qhp = to_iwch_qp(ibqp); + rhp = qhp->rhp; + + attrs.next_state = iwch_convert_state(attr->qp_state); + attrs.enable_rdma_read = (attr->qp_access_flags & + IB_ACCESS_REMOTE_READ) ? 1 : 0; + attrs.enable_rdma_write = (attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; + + + mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; + mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? + (IWCH_QP_ATTR_ENABLE_RDMA_READ | + IWCH_QP_ATTR_ENABLE_RDMA_WRITE | + IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; + + return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); +} + +void iwch_qp_add_ref(struct ib_qp *qp) +{ + PDBG("%s ib_qp %p\n", __func__, qp); + atomic_inc(&(to_iwch_qp(qp)->refcnt)); +} + +void iwch_qp_rem_ref(struct ib_qp *qp) +{ + PDBG("%s ib_qp %p\n", __func__, qp); + if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt))) + wake_up(&(to_iwch_qp(qp)->wait)); +} + +static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) +{ + PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); + return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); +} + + +static int iwch_query_pkey(struct ib_device *ibdev, + u8 port, u16 index, u16 * pkey) +{ + PDBG("%s ibdev %p\n", __func__, ibdev); + *pkey = 0; + return 0; +} + +static int iwch_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct iwch_dev *dev; + + PDBG("%s ibdev %p, port %d, index %d, gid %p\n", + __func__, ibdev, port, index, gid); + dev = to_iwch_dev(ibdev); + BUG_ON(port == 0 || port > 2); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6); + return 0; +} + +static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min, fw_mic; + + lldev->ethtool_ops->get_drvinfo(lldev, &info); + + next = info.fw_version + 1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_mic); + + return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | + (fw_mic & 0xffff); +} + +static int iwch_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + + struct iwch_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_iwch_dev(ibdev); + memset(props, 0, sizeof *props); + memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); + props->hw_ver = dev->rdev.t3cdev_p->type; + props->fw_ver = fw_vers_string_to_u64(dev); + props->device_cap_flags = dev->device_cap_flags; + props->page_size_cap = dev->attr.mem_pgsizes_bitmask; + props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; + props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; + props->max_mr_size = dev->attr.max_mr_size; + props->max_qp = dev->attr.max_qps; + props->max_qp_wr = dev->attr.max_wrs; + props->max_sge = dev->attr.max_sge_per_wr; + props->max_sge_rd = 1; + props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_cq = dev->attr.max_cqs; + props->max_cqe = dev->attr.max_cqes_per_cq; + props->max_mr = dev->attr.max_mem_regs; + props->max_pd = dev->attr.max_pds; + props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; + + return 0; +} + +static int iwch_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + struct iwch_dev *dev; + struct net_device *netdev; + struct in_device *inetdev; + + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_iwch_dev(ibdev); + netdev = dev->rdev.port_info.lldevs[port-1]; + + memset(props, 0, sizeof(struct ib_port_attr)); + props->max_mtu = IB_MTU_4096; + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + if (!netif_carrier_ok(netdev)) + props->state = IB_PORT_DOWN; + else { + inetdev = in_dev_get(netdev); + if (inetdev) { + if (inetdev->ifa_list) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_INIT; + in_dev_put(inetdev); + } else + props->state = IB_PORT_INIT; + } + + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->active_width = 2; + props->active_speed = IB_SPEED_DDR; + props->max_msg_sz = -1; + + return 0; +} + +static ssize_t show_rev(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); +} + +static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + + PDBG("%s dev 0x%p\n", __func__, dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + return sprintf(buf, "%s\n", info.fw_version); +} + +static ssize_t show_hca(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + + PDBG("%s dev 0x%p\n", __func__, dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + return sprintf(buf, "%s\n", info.driver); +} + +static ssize_t show_board(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, + iwch_dev->rdev.rnic_info.pdev->device); +} + +static int iwch_get_mib(struct ib_device *ibdev, + union rdma_protocol_stats *stats) +{ + struct iwch_dev *dev; + struct tp_mib_stats m; + int ret; + + PDBG("%s ibdev %p\n", __func__, ibdev); + dev = to_iwch_dev(ibdev); + ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); + if (ret) + return -ENOSYS; + + memset(stats, 0, sizeof *stats); + stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + + m.ipInReceive_lo; + stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + + m.ipInHdrErrors_lo; + stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + + m.ipInAddrErrors_lo; + stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + + m.ipInUnknownProtos_lo; + stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + + m.ipInDiscards_lo; + stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + + m.ipInDelivers_lo; + stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + + m.ipOutRequests_lo; + stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + + m.ipOutDiscards_lo; + stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + + m.ipOutNoRoutes_lo; + stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; + stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; + stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; + stats->iw.ipReasmFails = (u64) m.ipReasmFails; + stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; + stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; + stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; + stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; + stats->iw.tcpOutRsts = (u64) m.tcpOutRsts; + stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab; + stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) + + m.tcpInSegs_lo; + stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) + + m.tcpOutSegs_lo; + stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) + + m.tcpRetransSeg_lo; + stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) + + m.tcpInErrs_lo; + stats->iw.tcpRtoMin = (u64) m.tcpRtoMin; + stats->iw.tcpRtoMax = (u64) m.tcpRtoMax; + return 0; +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *iwch_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id, +}; + +int iwch_register_device(struct iwch_dev *dev) +{ + int ret; + int i; + + PDBG("%s iwch_dev %p\n", __func__, dev); + strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX); + memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); + memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); + dev->ibdev.owner = THIS_MODULE; + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; + + /* cxgb3 supports STag 0. */ + dev->ibdev.local_dma_lkey = 0; + + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + dev->ibdev.node_type = RDMA_NODE_RNIC; + memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); + dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; + dev->ibdev.num_comp_vectors = 1; + dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); + dev->ibdev.query_device = iwch_query_device; + dev->ibdev.query_port = iwch_query_port; + dev->ibdev.query_pkey = iwch_query_pkey; + dev->ibdev.query_gid = iwch_query_gid; + dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; + dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext; + dev->ibdev.mmap = iwch_mmap; + dev->ibdev.alloc_pd = iwch_allocate_pd; + dev->ibdev.dealloc_pd = iwch_deallocate_pd; + dev->ibdev.create_ah = iwch_ah_create; + dev->ibdev.destroy_ah = iwch_ah_destroy; + dev->ibdev.create_qp = iwch_create_qp; + dev->ibdev.modify_qp = iwch_ib_modify_qp; + dev->ibdev.destroy_qp = iwch_destroy_qp; + dev->ibdev.create_cq = iwch_create_cq; + dev->ibdev.destroy_cq = iwch_destroy_cq; + dev->ibdev.resize_cq = iwch_resize_cq; + dev->ibdev.poll_cq = iwch_poll_cq; + dev->ibdev.get_dma_mr = iwch_get_dma_mr; + dev->ibdev.reg_phys_mr = iwch_register_phys_mem; + dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem; + dev->ibdev.reg_user_mr = iwch_reg_user_mr; + dev->ibdev.dereg_mr = iwch_dereg_mr; + dev->ibdev.alloc_mw = iwch_alloc_mw; + dev->ibdev.bind_mw = iwch_bind_mw; + dev->ibdev.dealloc_mw = iwch_dealloc_mw; + dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr; + dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; + dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; + dev->ibdev.attach_mcast = iwch_multicast_attach; + dev->ibdev.detach_mcast = iwch_multicast_detach; + dev->ibdev.process_mad = iwch_process_mad; + dev->ibdev.req_notify_cq = iwch_arm_cq; + dev->ibdev.post_send = iwch_post_send; + dev->ibdev.post_recv = iwch_post_receive; + dev->ibdev.get_protocol_stats = iwch_get_mib; + dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; + + dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + if (!dev->ibdev.iwcm) + return -ENOMEM; + + dev->ibdev.iwcm->connect = iwch_connect; + dev->ibdev.iwcm->accept = iwch_accept_cr; + dev->ibdev.iwcm->reject = iwch_reject_cr; + dev->ibdev.iwcm->create_listen = iwch_create_listen; + dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen; + dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; + dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; + dev->ibdev.iwcm->get_qp = iwch_get_qp; + + ret = ib_register_device(&dev->ibdev, NULL); + if (ret) + goto bail1; + + for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) { + ret = device_create_file(&dev->ibdev.dev, + iwch_class_attributes[i]); + if (ret) { + goto bail2; + } + } + return 0; +bail2: + ib_unregister_device(&dev->ibdev); +bail1: + kfree(dev->ibdev.iwcm); + return ret; +} + +void iwch_unregister_device(struct iwch_dev *dev) +{ + int i; + + PDBG("%s iwch_dev %p\n", __func__, dev); + for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) + device_remove_file(&dev->ibdev.dev, + iwch_class_attributes[i]); + ib_unregister_device(&dev->ibdev); + kfree(dev->ibdev.iwcm); + return; +} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h new file mode 100644 index 00000000..87c14b0c --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -0,0 +1,360 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IWCH_PROVIDER_H__ +#define __IWCH_PROVIDER_H__ + +#include +#include +#include +#include +#include "t3cdev.h" +#include "iwch.h" +#include "cxio_wr.h" +#include "cxio_hal.h" + +struct iwch_pd { + struct ib_pd ibpd; + u32 pdid; + struct iwch_dev *rhp; +}; + +static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct iwch_pd, ibpd); +} + +struct tpt_attributes { + u32 stag; + u32 state:1; + u32 type:2; + u32 rsvd:1; + enum tpt_mem_perm perms; + u32 remote_invaliate_disable:1; + u32 zbva:1; + u32 mw_bind_enable:1; + u32 page_size:5; + + u32 pdid; + u32 qpid; + u32 pbl_addr; + u32 len; + u64 va_fbo; + u32 pbl_size; +}; + +struct iwch_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct iwch_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +typedef struct iwch_mw iwch_mw_handle; + +static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct iwch_mr, ibmr); +} + +struct iwch_mw { + struct ib_mw ibmw; + struct iwch_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct iwch_mw, ibmw); +} + +struct iwch_cq { + struct ib_cq ibcq; + struct iwch_dev *rhp; + struct t3_cq cq; + spinlock_t lock; + spinlock_t comp_handler_lock; + atomic_t refcnt; + wait_queue_head_t wait; + u32 __user *user_rptr_addr; +}; + +static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct iwch_cq, ibcq); +} + +enum IWCH_QP_FLAGS { + QP_QUIESCED = 0x01 +}; + +struct iwch_mpa_attributes { + u8 initiator; + u8 recv_marker_enabled; + u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ + u8 crc_enabled; + u8 version; /* 0 or 1 */ +}; + +struct iwch_qp_attributes { + u32 scq; + u32 rcq; + u32 sq_num_entries; + u32 rq_num_entries; + u32 sq_max_sges; + u32 sq_max_sges_rdma_write; + u32 rq_max_sges; + u32 state; + u8 enable_rdma_read; + u8 enable_rdma_write; /* enable inbound Read Resp. */ + u8 enable_bind; + u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */ + /* + * Next QP state. If specify the current state, only the + * QP attributes will be modified. + */ + u32 max_ord; + u32 max_ird; + u32 pd; /* IN */ + u32 next_state; + char terminate_buffer[52]; + u32 terminate_msg_len; + u8 is_terminate_local; + struct iwch_mpa_attributes mpa_attr; /* IN-OUT */ + struct iwch_ep *llp_stream_handle; + char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */ + u32 stream_msg_buf_len; /* Only on Idle -> RTS */ +}; + +struct iwch_qp { + struct ib_qp ibqp; + struct iwch_dev *rhp; + struct iwch_ep *ep; + struct iwch_qp_attributes attr; + struct t3_wq wq; + spinlock_t lock; + atomic_t refcnt; + wait_queue_head_t wait; + enum IWCH_QP_FLAGS flags; + struct timer_list timer; +}; + +static inline int qp_quiesced(struct iwch_qp *qhp) +{ + return qhp->flags & QP_QUIESCED; +} + +static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct iwch_qp, ibqp); +} + +void iwch_qp_add_ref(struct ib_qp *qp); +void iwch_qp_rem_ref(struct ib_qp *qp); + +struct iwch_ucontext { + struct ib_ucontext ibucontext; + struct cxio_ucontext uctx; + u32 key; + spinlock_t mmap_lock; + struct list_head mmaps; +}; + +static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c) +{ + return container_of(c, struct iwch_ucontext, ibucontext); +} + +struct iwch_mm_entry { + struct list_head entry; + u64 addr; + u32 key; + unsigned len; +}; + +static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext, + u32 key, unsigned len) +{ + struct list_head *pos, *nxt; + struct iwch_mm_entry *mm; + + spin_lock(&ucontext->mmap_lock); + list_for_each_safe(pos, nxt, &ucontext->mmaps) { + + mm = list_entry(pos, struct iwch_mm_entry, entry); + if (mm->key == key && mm->len == len) { + list_del_init(&mm->entry); + spin_unlock(&ucontext->mmap_lock); + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, + key, (unsigned long long) mm->addr, mm->len); + return mm; + } + } + spin_unlock(&ucontext->mmap_lock); + return NULL; +} + +static inline void insert_mmap(struct iwch_ucontext *ucontext, + struct iwch_mm_entry *mm) +{ + spin_lock(&ucontext->mmap_lock); + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, + mm->key, (unsigned long long) mm->addr, mm->len); + list_add_tail(&mm->entry, &ucontext->mmaps); + spin_unlock(&ucontext->mmap_lock); +} + +enum iwch_qp_attr_mask { + IWCH_QP_ATTR_NEXT_STATE = 1 << 0, + IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, + IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, + IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, + IWCH_QP_ATTR_MAX_ORD = 1 << 11, + IWCH_QP_ATTR_MAX_IRD = 1 << 12, + IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, + IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, + IWCH_QP_ATTR_MPA_ATTR = 1 << 24, + IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, + IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ | + IWCH_QP_ATTR_ENABLE_RDMA_WRITE | + IWCH_QP_ATTR_MAX_ORD | + IWCH_QP_ATTR_MAX_IRD | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | + IWCH_QP_ATTR_STREAM_MSG_BUFFER | + IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE) +}; + +int iwch_modify_qp(struct iwch_dev *rhp, + struct iwch_qp *qhp, + enum iwch_qp_attr_mask mask, + struct iwch_qp_attributes *attrs, + int internal); + +enum iwch_qp_state { + IWCH_QP_STATE_IDLE, + IWCH_QP_STATE_RTS, + IWCH_QP_STATE_ERROR, + IWCH_QP_STATE_TERMINATE, + IWCH_QP_STATE_CLOSING, + IWCH_QP_STATE_TOT +}; + +static inline int iwch_convert_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + case IB_QPS_INIT: + return IWCH_QP_STATE_IDLE; + case IB_QPS_RTS: + return IWCH_QP_STATE_RTS; + case IB_QPS_SQD: + return IWCH_QP_STATE_CLOSING; + case IB_QPS_SQE: + return IWCH_QP_STATE_TERMINATE; + case IB_QPS_ERR: + return IWCH_QP_STATE_ERROR; + default: + return -1; + } +} + +static inline u32 iwch_ib_to_tpt_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | + TPT_LOCAL_READ; +} + +static inline u32 iwch_ib_to_tpt_bind_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); +} + +enum iwch_mmid_state { + IWCH_STAG_STATE_VALID, + IWCH_STAG_STATE_INVALID +}; + +enum iwch_qp_query_flags { + IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */ + IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */ + IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */ + + /* + * Quiesce QP context; Consumer + * will NOT replay outstanding WR + */ + IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4, + IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8, + IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ +}; + +u16 iwch_rqes_posted(struct iwch_qp *qhp); +int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int iwch_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind); +int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); +int iwch_post_zb_read(struct iwch_ep *ep); +int iwch_register_device(struct iwch_dev *dev); +void iwch_unregister_device(struct iwch_dev *dev); +void stop_read_rep_timer(struct iwch_qp *qhp); +int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, int shift); +int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, + int shift, + int npages); +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); +void iwch_free_pbl(struct iwch_mr *mhp); +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); +int build_phys_page_list(struct ib_phys_buf *buffer_list, + int num_phys_buf, + u64 *iova_start, + u64 *total_size, + int *npages, + int *shift, + __be64 **page_list); + + +#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" + +#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c new file mode 100644 index 00000000..6de8463f --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -0,0 +1,1161 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include "iwch_provider.h" +#include "iwch.h" +#include "iwch_cm.h" +#include "cxio_hal.h" +#include "cxio_resource.h" + +#define NO_SUPPORT -1 + +static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, + u8 * flit_cnt) +{ + int i; + u32 plen; + + switch (wr->opcode) { + case IB_WR_SEND: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.rdmaop = T3_SEND_WITH_SE; + else + wqe->send.rdmaop = T3_SEND; + wqe->send.rem_stag = 0; + break; + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.rdmaop = T3_SEND_WITH_SE_INV; + else + wqe->send.rdmaop = T3_SEND_WITH_INV; + wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); + break; + default: + return -EINVAL; + } + if (wr->num_sge > T3_MAX_SGE) + return -EINVAL; + wqe->send.reserved[0] = 0; + wqe->send.reserved[1] = 0; + wqe->send.reserved[2] = 0; + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) + return -EMSGSIZE; + + plen += wr->sg_list[i].length; + wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); + wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); + } + wqe->send.num_sgle = cpu_to_be32(wr->num_sge); + *flit_cnt = 4 + ((wr->num_sge) << 1); + wqe->send.plen = cpu_to_be32(plen); + return 0; +} + +static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + int i; + u32 plen; + if (wr->num_sge > T3_MAX_SGE) + return -EINVAL; + wqe->write.rdmaop = T3_RDMA_WRITE; + wqe->write.reserved[0] = 0; + wqe->write.reserved[1] = 0; + wqe->write.reserved[2] = 0; + wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); + wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + plen = 4; + wqe->write.sgl[0].stag = wr->ex.imm_data; + wqe->write.sgl[0].len = cpu_to_be32(0); + wqe->write.num_sgle = cpu_to_be32(0); + *flit_cnt = 6; + } else { + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) { + return -EMSGSIZE; + } + plen += wr->sg_list[i].length; + wqe->write.sgl[i].stag = + cpu_to_be32(wr->sg_list[i].lkey); + wqe->write.sgl[i].len = + cpu_to_be32(wr->sg_list[i].length); + wqe->write.sgl[i].to = + cpu_to_be64(wr->sg_list[i].addr); + } + wqe->write.num_sgle = cpu_to_be32(wr->num_sge); + *flit_cnt = 5 + ((wr->num_sge) << 1); + } + wqe->write.plen = cpu_to_be32(plen); + return 0; +} + +static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + if (wr->num_sge > 1) + return -EINVAL; + wqe->read.rdmaop = T3_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + wqe->read.local_inv = 1; + else + wqe->read.local_inv = 0; + wqe->read.reserved[0] = 0; + wqe->read.reserved[1] = 0; + wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); + wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); + wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); + wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); + *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; + return 0; +} + +static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +{ + int i; + __be64 *p; + + if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + return -EINVAL; + *wr_cnt = 1; + wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); + wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); + wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fastreg.va_base_lo_fbo = + cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + wqe->fastreg.page_type_perms = cpu_to_be32( + V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | + V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_TYPE(TPT_VATO) | + V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + p = &wqe->fastreg.pbl_addrs[0]; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + + /* If we need a 2nd WR, then set it up */ + if (i == T3_MAX_FASTREG_FRAG) { + *wr_cnt = 2; + wqe = (union t3_wr *)(wq->queue + + Q_PTR2IDX((wq->wptr+1), wq->size_log2)); + build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, + Q_GENBIT(wq->wptr + 1, wq->size_log2), + 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + T3_EOP); + + p = &wqe->pbl_frag.pbl_addrs[0]; + } + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + } + *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + if (*flit_cnt > 15) + *flit_cnt = 15; + return 0; +} + +static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->local_inv.reserved = 0; + *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; + return 0; +} + +static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, + u32 num_sgle, u32 * pbl_addr, u8 * page_size) +{ + int i; + struct iwch_mr *mhp; + u64 offset; + for (i = 0; i < num_sgle; i++) { + + mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); + if (!mhp) { + PDBG("%s %d\n", __func__, __LINE__); + return -EIO; + } + if (!mhp->attr.state) { + PDBG("%s %d\n", __func__, __LINE__); + return -EIO; + } + if (mhp->attr.zbva) { + PDBG("%s %d\n", __func__, __LINE__); + return -EIO; + } + + if (sg_list[i].addr < mhp->attr.va_fbo) { + PDBG("%s %d\n", __func__, __LINE__); + return -EINVAL; + } + if (sg_list[i].addr + ((u64) sg_list[i].length) < + sg_list[i].addr) { + PDBG("%s %d\n", __func__, __LINE__); + return -EINVAL; + } + if (sg_list[i].addr + ((u64) sg_list[i].length) > + mhp->attr.va_fbo + ((u64) mhp->attr.len)) { + PDBG("%s %d\n", __func__, __LINE__); + return -EINVAL; + } + offset = sg_list[i].addr - mhp->attr.va_fbo; + offset += mhp->attr.va_fbo & + ((1UL << (12 + mhp->attr.page_size)) - 1); + pbl_addr[i] = ((mhp->attr.pbl_addr - + rhp->rdev.rnic_info.pbl_base) >> 3) + + (offset >> (12 + mhp->attr.page_size)); + page_size[i] = mhp->attr.page_size; + } + return 0; +} + +static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, + struct ib_recv_wr *wr) +{ + int i, err = 0; + u32 pbl_addr[T3_MAX_SGE]; + u8 page_size[T3_MAX_SGE]; + + err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, + page_size); + if (err) + return err; + wqe->recv.pagesz[0] = page_size[0]; + wqe->recv.pagesz[1] = page_size[1]; + wqe->recv.pagesz[2] = page_size[2]; + wqe->recv.pagesz[3] = page_size[3]; + wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); + for (i = 0; i < wr->num_sge; i++) { + wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); + wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + + /* to in the WQE == the offset into the page */ + wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & + ((1UL << (12 + page_size[i])) - 1)); + + /* pbl_addr is the adapters address in the PBL */ + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); + } + for (; i < T3_MAX_SGE; i++) { + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = 0; + wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; + } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = 0; + return 0; +} + +static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, + struct ib_recv_wr *wr) +{ + int i; + u32 pbl_addr; + u32 pbl_offset; + + + /* + * The T3 HW requires the PBL in the HW recv descriptor to reference + * a PBL entry. So we allocate the max needed PBL memory here and pass + * it to the uP in the recv WR. The uP will build the PBL and setup + * the HW recv descriptor. + */ + pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); + if (!pbl_addr) + return -ENOMEM; + + /* + * Compute the 8B aligned offset. + */ + pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; + + wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); + + for (i = 0; i < wr->num_sge; i++) { + + /* + * Use a 128MB page size. This and an imposed 128MB + * sge length limit allows us to require only a 2-entry HW + * PBL for each SGE. This restriction is acceptable since + * since it is not possible to allocate 128MB of contiguous + * DMA coherent memory! + */ + if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) + return -EINVAL; + wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; + + /* + * T3 restricts a recv to all zero-stag or all non-zero-stag. + */ + if (wr->sg_list[i].lkey != 0) + return -EINVAL; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); + pbl_offset += 2; + } + for (; i < T3_MAX_SGE; i++) { + wqe->recv.pagesz[i] = 0; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = 0; + wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; + } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; + return 0; +} + +int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int err = 0; + u8 uninitialized_var(t3_wr_flit_cnt); + enum t3_wr_opcode t3_wr_opcode = 0; + enum t3_wr_flags t3_wr_flags; + struct iwch_qp *qhp; + u32 idx; + union t3_wr *wqe; + u32 num_wrs; + unsigned long flag; + struct t3_swsq *sqp; + int wr_cnt = 1; + + qhp = to_iwch_qp(ibqp); + spin_lock_irqsave(&qhp->lock, flag); + if (qhp->attr.state > IWCH_QP_STATE_RTS) { + spin_unlock_irqrestore(&qhp->lock, flag); + err = -EINVAL; + goto out; + } + num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, + qhp->wq.sq_size_log2); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + err = -ENOMEM; + goto out; + } + while (wr) { + if (num_wrs == 0) { + err = -ENOMEM; + break; + } + idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); + wqe = (union t3_wr *) (qhp->wq.queue + idx); + t3_wr_flags = 0; + if (wr->send_flags & IB_SEND_SOLICITED) + t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; + if (wr->send_flags & IB_SEND_SIGNALED) + t3_wr_flags |= T3_COMPLETION_FLAG; + sqp = qhp->wq.sq + + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); + switch (wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_READ_FENCE_FLAG; + t3_wr_opcode = T3_WR_SEND; + err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + t3_wr_opcode = T3_WR_WRITE; + err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + t3_wr_opcode = T3_WR_READ; + t3_wr_flags = 0; /* T3 reads are always signaled */ + err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); + if (err) + break; + sqp->read_len = wqe->read.local_len; + if (!qhp->wq.oldest_read) + qhp->wq.oldest_read = sqp; + break; + case IB_WR_FAST_REG_MR: + t3_wr_opcode = T3_WR_FASTREG; + err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); + break; + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_LOCAL_FENCE_FLAG; + t3_wr_opcode = T3_WR_INV_STAG; + err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); + break; + default: + PDBG("%s post of type=%d TBD!\n", __func__, + wr->opcode); + err = -EINVAL; + } + if (err) + break; + wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; + sqp->wr_id = wr->wr_id; + sqp->opcode = wr2opcode(t3_wr_opcode); + sqp->sq_wptr = qhp->wq.sq_wptr; + sqp->complete = 0; + sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); + + build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, + Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), + 0, t3_wr_flit_cnt, + (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); + PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", + __func__, (unsigned long long) wr->wr_id, idx, + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), + sqp->opcode); + wr = wr->next; + num_wrs--; + qhp->wq.wptr += wr_cnt; + ++(qhp->wq.sq_wptr); + } + spin_unlock_irqrestore(&qhp->lock, flag); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + +out: + if (err) + *bad_wr = wr; + return err; +} + +int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct iwch_qp *qhp; + u32 idx; + union t3_wr *wqe; + u32 num_wrs; + unsigned long flag; + + qhp = to_iwch_qp(ibqp); + spin_lock_irqsave(&qhp->lock, flag); + if (qhp->attr.state > IWCH_QP_STATE_RTS) { + spin_unlock_irqrestore(&qhp->lock, flag); + err = -EINVAL; + goto out; + } + num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, + qhp->wq.rq_size_log2) - 1; + if (!wr) { + spin_unlock_irqrestore(&qhp->lock, flag); + err = -ENOMEM; + goto out; + } + while (wr) { + if (wr->num_sge > T3_MAX_SGE) { + err = -EINVAL; + break; + } + idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); + wqe = (union t3_wr *) (qhp->wq.queue + idx); + if (num_wrs) + if (wr->sg_list[0].lkey) + err = build_rdma_recv(qhp, wqe, wr); + else + err = build_zero_stag_recv(qhp, wqe, wr); + else + err = -ENOMEM; + + if (err) + break; + + build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, + Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), + 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); + PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " + "wqe %p \n", __func__, (unsigned long long) wr->wr_id, + idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); + ++(qhp->wq.rq_wptr); + ++(qhp->wq.wptr); + wr = wr->next; + num_wrs--; + } + spin_unlock_irqrestore(&qhp->lock, flag); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + +out: + if (err) + *bad_wr = wr; + return err; +} + +int iwch_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + struct iwch_dev *rhp; + struct iwch_mw *mhp; + struct iwch_qp *qhp; + union t3_wr *wqe; + u32 pbl_addr; + u8 page_size; + u32 num_wrs; + unsigned long flag; + struct ib_sge sgl; + int err=0; + enum t3_wr_flags t3_wr_flags; + u32 idx; + struct t3_swsq *sqp; + + qhp = to_iwch_qp(qp); + mhp = to_iwch_mw(mw); + rhp = qhp->rhp; + + spin_lock_irqsave(&qhp->lock, flag); + if (qhp->attr.state > IWCH_QP_STATE_RTS) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -EINVAL; + } + num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, + qhp->wq.sq_size_log2); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -ENOMEM; + } + idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); + PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx, + mw, mw_bind); + wqe = (union t3_wr *) (qhp->wq.queue + idx); + + t3_wr_flags = 0; + if (mw_bind->send_flags & IB_SEND_SIGNALED) + t3_wr_flags = T3_COMPLETION_FLAG; + + sgl.addr = mw_bind->addr; + sgl.lkey = mw_bind->mr->lkey; + sgl.length = mw_bind->length; + wqe->bind.reserved = 0; + wqe->bind.type = TPT_VATO; + + /* TBD: check perms */ + wqe->bind.perms = iwch_ib_to_tpt_bind_access(mw_bind->mw_access_flags); + wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); + wqe->bind.mw_stag = cpu_to_be32(mw->rkey); + wqe->bind.mw_len = cpu_to_be32(mw_bind->length); + wqe->bind.mw_va = cpu_to_be64(mw_bind->addr); + err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); + if (err) { + spin_unlock_irqrestore(&qhp->lock, flag); + return err; + } + wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; + sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); + sqp->wr_id = mw_bind->wr_id; + sqp->opcode = T3_BIND_MW; + sqp->sq_wptr = qhp->wq.sq_wptr; + sqp->complete = 0; + sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); + wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr); + wqe->bind.mr_pagesz = page_size; + build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, + Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, + sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP); + ++(qhp->wq.wptr); + ++(qhp->wq.sq_wptr); + spin_unlock_irqrestore(&qhp->lock, flag); + + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + + return err; +} + +static inline void build_term_codes(struct respQ_msg_t *rsp_msg, + u8 *layer_type, u8 *ecode) +{ + int status = TPT_ERR_INTERNAL_ERR; + int tagged = 0; + int opcode = -1; + int rqtype = 0; + int send_inv = 0; + + if (rsp_msg) { + status = CQE_STATUS(rsp_msg->cqe); + opcode = CQE_OPCODE(rsp_msg->cqe); + rqtype = RQ_TYPE(rsp_msg->cqe); + send_inv = (opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV); + tagged = (opcode == T3_RDMA_WRITE) || + (rqtype && (opcode == T3_READ_RESP)); + } + + switch (status) { + case TPT_ERR_STAG: + if (send_inv) { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_INV_STAG; + } + break; + case TPT_ERR_PDID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + if ((opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV)) + *ecode = RDMAP_CANT_INV_STAG; + else + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case TPT_ERR_QPID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case TPT_ERR_ACCESS: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_ACC_VIOL; + break; + case TPT_ERR_WRAP: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_TO_WRAP; + break; + case TPT_ERR_BOUND: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_BASE_BOUNDS; + } + break; + case TPT_ERR_INVALIDATE_SHARED_MR: + case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + break; + case TPT_ERR_ECC: + case TPT_ERR_ECC_PSTAG: + case TPT_ERR_INTERNAL_ERR: + *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; + *ecode = 0; + break; + case TPT_ERR_OUT_OF_RQE: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_NOBUF; + break; + case TPT_ERR_PBL_ADDR_BOUND: + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + break; + case TPT_ERR_CRC: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_CRC_ERR; + break; + case TPT_ERR_MARKER: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_MARKER_ERR; + break; + case TPT_ERR_PDU_LEN_ERR: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_MSG_TOOBIG; + break; + case TPT_ERR_DDP_VERSION: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_INV_VERS; + } else { + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_VERS; + } + break; + case TPT_ERR_RDMA_VERSION: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_VERS; + break; + case TPT_ERR_OPCODE: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_OPCODE; + break; + case TPT_ERR_DDP_QUEUE_NUM: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_QN; + break; + case TPT_ERR_MSN: + case TPT_ERR_MSN_GAP: + case TPT_ERR_MSN_RANGE: + case TPT_ERR_IRD_OVERFLOW: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_RANGE; + break; + case TPT_ERR_TBIT: + *layer_type = LAYER_DDP|DDP_LOCAL_CATA; + *ecode = 0; + break; + case TPT_ERR_MO: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MO; + break; + default: + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + break; + } +} + +int iwch_post_zb_read(struct iwch_ep *ep) +{ + union t3_wr *wqe; + struct sk_buff *skb; + u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; + + PDBG("%s enter\n", __func__); + skb = alloc_skb(40, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR "%s cannot send zb_read!!\n", __func__); + return -ENOMEM; + } + wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr)); + memset(wqe, 0, sizeof(struct t3_rdma_read_wr)); + wqe->read.rdmaop = T3_READ_REQ; + wqe->read.reserved[0] = 0; + wqe->read.reserved[1] = 0; + wqe->read.rem_stag = cpu_to_be32(1); + wqe->read.rem_to = cpu_to_be64(1); + wqe->read.local_stag = cpu_to_be32(1); + wqe->read.local_len = cpu_to_be32(0); + wqe->read.local_to = cpu_to_be64(1); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| + V_FW_RIWR_LEN(flit_cnt)); + skb->priority = CPL_PRIORITY_DATA; + return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); +} + +/* + * This posts a TERMINATE with layer=RDMA, type=catastrophic. + */ +int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) +{ + union t3_wr *wqe; + struct terminate_message *term; + struct sk_buff *skb; + + PDBG("%s %d\n", __func__, __LINE__); + skb = alloc_skb(40, GFP_ATOMIC); + if (!skb) { + printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__); + return -ENOMEM; + } + wqe = (union t3_wr *)skb_put(skb, 40); + memset(wqe, 0, 40); + wqe->send.rdmaop = T3_TERMINATE; + + /* immediate data length */ + wqe->send.plen = htonl(4); + + /* immediate data starts here. */ + term = (struct terminate_message *)wqe->send.sgl; + build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | + V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); + skb->priority = CPL_PRIORITY_DATA; + return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); +} + +/* + * Assumes qhp lock is held. + */ +static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, + struct iwch_cq *schp) +{ + int count; + int flushed; + + + PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); + /* take a ref on the qhp since we must release the lock */ + atomic_inc(&qhp->refcnt); + spin_unlock(&qhp->lock); + + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock(&rchp->lock); + spin_lock(&qhp->lock); + cxio_flush_hw_cq(&rchp->cq); + cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); + flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); + spin_unlock(&qhp->lock); + spin_unlock(&rchp->lock); + if (flushed) { + spin_lock(&rchp->comp_handler_lock); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock(&rchp->comp_handler_lock); + } + + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock(&schp->lock); + spin_lock(&qhp->lock); + cxio_flush_hw_cq(&schp->cq); + cxio_count_scqes(&schp->cq, &qhp->wq, &count); + flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); + spin_unlock(&qhp->lock); + spin_unlock(&schp->lock); + if (flushed) { + spin_lock(&schp->comp_handler_lock); + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock(&schp->comp_handler_lock); + } + + /* deref */ + if (atomic_dec_and_test(&qhp->refcnt)) + wake_up(&qhp->wait); + + spin_lock(&qhp->lock); +} + +static void flush_qp(struct iwch_qp *qhp) +{ + struct iwch_cq *rchp, *schp; + + rchp = get_chp(qhp->rhp, qhp->attr.rcq); + schp = get_chp(qhp->rhp, qhp->attr.scq); + + if (qhp->ibqp.uobject) { + cxio_set_wq_in_error(&qhp->wq); + cxio_set_cq_in_error(&rchp->cq); + spin_lock(&rchp->comp_handler_lock); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock(&rchp->comp_handler_lock); + if (schp != rchp) { + cxio_set_cq_in_error(&schp->cq); + spin_lock(&schp->comp_handler_lock); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock(&schp->comp_handler_lock); + } + return; + } + __flush_qp(qhp, rchp, schp); +} + + +/* + * Return count of RECV WRs posted + */ +u16 iwch_rqes_posted(struct iwch_qp *qhp) +{ + union t3_wr *wqe = qhp->wq.queue; + u16 count = 0; + while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { + count++; + wqe++; + } + PDBG("%s qhp %p count %u\n", __func__, qhp, count); + return count; +} + +static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, + enum iwch_qp_attr_mask mask, + struct iwch_qp_attributes *attrs) +{ + struct t3_rdma_init_attr init_attr; + int ret; + + init_attr.tid = qhp->ep->hwtid; + init_attr.qpid = qhp->wq.qpid; + init_attr.pdid = qhp->attr.pd; + init_attr.scqid = qhp->attr.scq; + init_attr.rcqid = qhp->attr.rcq; + init_attr.rq_addr = qhp->wq.rq_addr; + init_attr.rq_size = 1 << qhp->wq.rq_size_log2; + init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | + qhp->attr.mpa_attr.recv_marker_enabled | + (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | + (qhp->attr.mpa_attr.crc_enabled << 2); + + init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | + uP_RI_QP_RDMA_WRITE_ENABLE | + uP_RI_QP_BIND_ENABLE; + if (!qhp->ibqp.uobject) + init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | + uP_RI_QP_FAST_REGISTER_ENABLE; + + init_attr.tcp_emss = qhp->ep->emss; + init_attr.ord = qhp->attr.max_ord; + init_attr.ird = qhp->attr.max_ird; + init_attr.qp_dma_addr = qhp->wq.dma_addr; + init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); + init_attr.rqe_count = iwch_rqes_posted(qhp); + init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; + init_attr.chan = qhp->ep->l2t->smt_idx; + if (peer2peer) { + init_attr.rtr_type = RTR_READ; + if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) + init_attr.ord = 1; + if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) + init_attr.ird = 1; + } else + init_attr.rtr_type = 0; + init_attr.irs = qhp->ep->rcv_seq; + PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " + "flags 0x%x qpcaps 0x%x\n", __func__, + init_attr.rq_addr, init_attr.rq_size, + init_attr.flags, init_attr.qpcaps); + ret = cxio_rdma_init(&rhp->rdev, &init_attr); + PDBG("%s ret %d\n", __func__, ret); + return ret; +} + +int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, + enum iwch_qp_attr_mask mask, + struct iwch_qp_attributes *attrs, + int internal) +{ + int ret = 0; + struct iwch_qp_attributes newattr = qhp->attr; + unsigned long flag; + int disconnect = 0; + int terminate = 0; + int abort = 0; + int free = 0; + struct iwch_ep *ep = NULL; + + PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__, + qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, + (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); + + spin_lock_irqsave(&qhp->lock, flag); + + /* Process attr changes if in IDLE */ + if (mask & IWCH_QP_ATTR_VALID_MODIFY) { + if (qhp->attr.state != IWCH_QP_STATE_IDLE) { + ret = -EIO; + goto out; + } + if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) + newattr.enable_rdma_read = attrs->enable_rdma_read; + if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) + newattr.enable_rdma_write = attrs->enable_rdma_write; + if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) + newattr.enable_bind = attrs->enable_bind; + if (mask & IWCH_QP_ATTR_MAX_ORD) { + if (attrs->max_ord > + rhp->attr.max_rdma_read_qp_depth) { + ret = -EINVAL; + goto out; + } + newattr.max_ord = attrs->max_ord; + } + if (mask & IWCH_QP_ATTR_MAX_IRD) { + if (attrs->max_ird > + rhp->attr.max_rdma_reads_per_qp) { + ret = -EINVAL; + goto out; + } + newattr.max_ird = attrs->max_ird; + } + qhp->attr = newattr; + } + + if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) + goto out; + if (qhp->attr.state == attrs->next_state) + goto out; + + switch (qhp->attr.state) { + case IWCH_QP_STATE_IDLE: + switch (attrs->next_state) { + case IWCH_QP_STATE_RTS: + if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { + ret = -EINVAL; + goto out; + } + if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { + ret = -EINVAL; + goto out; + } + qhp->attr.mpa_attr = attrs->mpa_attr; + qhp->attr.llp_stream_handle = attrs->llp_stream_handle; + qhp->ep = qhp->attr.llp_stream_handle; + qhp->attr.state = IWCH_QP_STATE_RTS; + + /* + * Ref the endpoint here and deref when we + * disassociate the endpoint from the QP. This + * happens in CLOSING->IDLE transition or *->ERROR + * transition. + */ + get_ep(&qhp->ep->com); + spin_unlock_irqrestore(&qhp->lock, flag); + ret = rdma_init(rhp, qhp, mask, attrs); + spin_lock_irqsave(&qhp->lock, flag); + if (ret) + goto err; + break; + case IWCH_QP_STATE_ERROR: + qhp->attr.state = IWCH_QP_STATE_ERROR; + flush_qp(qhp); + break; + default: + ret = -EINVAL; + goto out; + } + break; + case IWCH_QP_STATE_RTS: + switch (attrs->next_state) { + case IWCH_QP_STATE_CLOSING: + BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + qhp->attr.state = IWCH_QP_STATE_CLOSING; + if (!internal) { + abort=0; + disconnect = 1; + ep = qhp->ep; + get_ep(&ep->com); + } + break; + case IWCH_QP_STATE_TERMINATE: + qhp->attr.state = IWCH_QP_STATE_TERMINATE; + if (qhp->ibqp.uobject) + cxio_set_wq_in_error(&qhp->wq); + if (!internal) + terminate = 1; + break; + case IWCH_QP_STATE_ERROR: + qhp->attr.state = IWCH_QP_STATE_ERROR; + if (!internal) { + abort=1; + disconnect = 1; + ep = qhp->ep; + get_ep(&ep->com); + } + goto err; + break; + default: + ret = -EINVAL; + goto out; + } + break; + case IWCH_QP_STATE_CLOSING: + if (!internal) { + ret = -EINVAL; + goto out; + } + switch (attrs->next_state) { + case IWCH_QP_STATE_IDLE: + flush_qp(qhp); + qhp->attr.state = IWCH_QP_STATE_IDLE; + qhp->attr.llp_stream_handle = NULL; + put_ep(&qhp->ep->com); + qhp->ep = NULL; + wake_up(&qhp->wait); + break; + case IWCH_QP_STATE_ERROR: + goto err; + default: + ret = -EINVAL; + goto err; + } + break; + case IWCH_QP_STATE_ERROR: + if (attrs->next_state != IWCH_QP_STATE_IDLE) { + ret = -EINVAL; + goto out; + } + + if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || + !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { + ret = -EINVAL; + goto out; + } + qhp->attr.state = IWCH_QP_STATE_IDLE; + break; + case IWCH_QP_STATE_TERMINATE: + if (!internal) { + ret = -EINVAL; + goto out; + } + goto err; + break; + default: + printk(KERN_ERR "%s in a bad state %d\n", + __func__, qhp->attr.state); + ret = -EINVAL; + goto err; + break; + } + goto out; +err: + PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, + qhp->wq.qpid); + + /* disassociate the LLP connection */ + qhp->attr.llp_stream_handle = NULL; + ep = qhp->ep; + qhp->ep = NULL; + qhp->attr.state = IWCH_QP_STATE_ERROR; + free=1; + wake_up(&qhp->wait); + BUG_ON(!ep); + flush_qp(qhp); +out: + spin_unlock_irqrestore(&qhp->lock, flag); + + if (terminate) + iwch_post_terminate(qhp, NULL); + + /* + * If disconnect is 1, then we need to initiate a disconnect + * on the EP. This can be a normal close (RTS->CLOSING) or + * an abnormal close (RTS/CLOSING->ERROR). + */ + if (disconnect) { + iwch_ep_disconnect(ep, abort, GFP_KERNEL); + put_ep(&ep->com); + } + + /* + * If free is 1, then we've disassociated the EP from the QP + * and we need to dereference the EP. + */ + if (free) + put_ep(&ep->com); + + PDBG("%s exit state %d\n", __func__, qhp->attr.state); + return ret; +} diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h new file mode 100644 index 00000000..a277c31f --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/iwch_user.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IWCH_USER_H__ +#define __IWCH_USER_H__ + +#define IWCH_UVERBS_ABI_VERSION 1 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ +struct iwch_create_cq_req { + __u64 user_rptr_addr; +}; + +struct iwch_create_cq_resp_v0 { + __u64 key; + __u32 cqid; + __u32 size_log2; +}; + +struct iwch_create_cq_resp { + __u64 key; + __u32 cqid; + __u32 size_log2; + __u32 memsize; + __u32 reserved; +}; + +struct iwch_create_qp_resp { + __u64 key; + __u64 db_key; + __u32 qpid; + __u32 size_log2; + __u32 sq_size_log2; + __u32 rq_size_log2; +}; + +struct iwch_reg_user_mr_resp { + __u32 pbl_addr; +}; +#endif diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h new file mode 100644 index 00000000..c702dc19 --- /dev/null +++ b/drivers/infiniband/hw/cxgb3/tcb.h @@ -0,0 +1,632 @@ +/* + * Copyright (c) 2007 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _TCB_DEFS_H +#define _TCB_DEFS_H + +#define W_TCB_T_STATE 0 +#define S_TCB_T_STATE 0 +#define M_TCB_T_STATE 0xfULL +#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE) + +#define W_TCB_TIMER 0 +#define S_TCB_TIMER 4 +#define M_TCB_TIMER 0x1ULL +#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER) + +#define W_TCB_DACK_TIMER 0 +#define S_TCB_DACK_TIMER 5 +#define M_TCB_DACK_TIMER 0x1ULL +#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER) + +#define W_TCB_DEL_FLAG 0 +#define S_TCB_DEL_FLAG 6 +#define M_TCB_DEL_FLAG 0x1ULL +#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG) + +#define W_TCB_L2T_IX 0 +#define S_TCB_L2T_IX 7 +#define M_TCB_L2T_IX 0x7ffULL +#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX) + +#define W_TCB_SMAC_SEL 0 +#define S_TCB_SMAC_SEL 18 +#define M_TCB_SMAC_SEL 0x3ULL +#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL) + +#define W_TCB_TOS 0 +#define S_TCB_TOS 20 +#define M_TCB_TOS 0x3fULL +#define V_TCB_TOS(x) ((x) << S_TCB_TOS) + +#define W_TCB_MAX_RT 0 +#define S_TCB_MAX_RT 26 +#define M_TCB_MAX_RT 0xfULL +#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT) + +#define W_TCB_T_RXTSHIFT 0 +#define S_TCB_T_RXTSHIFT 30 +#define M_TCB_T_RXTSHIFT 0xfULL +#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT) + +#define W_TCB_T_DUPACKS 1 +#define S_TCB_T_DUPACKS 2 +#define M_TCB_T_DUPACKS 0xfULL +#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS) + +#define W_TCB_T_MAXSEG 1 +#define S_TCB_T_MAXSEG 6 +#define M_TCB_T_MAXSEG 0xfULL +#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG) + +#define W_TCB_T_FLAGS1 1 +#define S_TCB_T_FLAGS1 10 +#define M_TCB_T_FLAGS1 0xffffffffULL +#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1) + +#define W_TCB_T_MIGRATION 1 +#define S_TCB_T_MIGRATION 20 +#define M_TCB_T_MIGRATION 0x1ULL +#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION) + +#define W_TCB_T_FLAGS2 2 +#define S_TCB_T_FLAGS2 10 +#define M_TCB_T_FLAGS2 0x7fULL +#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2) + +#define W_TCB_SND_SCALE 2 +#define S_TCB_SND_SCALE 17 +#define M_TCB_SND_SCALE 0xfULL +#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE) + +#define W_TCB_RCV_SCALE 2 +#define S_TCB_RCV_SCALE 21 +#define M_TCB_RCV_SCALE 0xfULL +#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE) + +#define W_TCB_SND_UNA_RAW 2 +#define S_TCB_SND_UNA_RAW 25 +#define M_TCB_SND_UNA_RAW 0x7ffffffULL +#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW) + +#define W_TCB_SND_NXT_RAW 3 +#define S_TCB_SND_NXT_RAW 20 +#define M_TCB_SND_NXT_RAW 0x7ffffffULL +#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW) + +#define W_TCB_RCV_NXT 4 +#define S_TCB_RCV_NXT 15 +#define M_TCB_RCV_NXT 0xffffffffULL +#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT) + +#define W_TCB_RCV_ADV 5 +#define S_TCB_RCV_ADV 15 +#define M_TCB_RCV_ADV 0xffffULL +#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV) + +#define W_TCB_SND_MAX_RAW 5 +#define S_TCB_SND_MAX_RAW 31 +#define M_TCB_SND_MAX_RAW 0x7ffffffULL +#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW) + +#define W_TCB_SND_CWND 6 +#define S_TCB_SND_CWND 26 +#define M_TCB_SND_CWND 0x7ffffffULL +#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND) + +#define W_TCB_SND_SSTHRESH 7 +#define S_TCB_SND_SSTHRESH 21 +#define M_TCB_SND_SSTHRESH 0x7ffffffULL +#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH) + +#define W_TCB_T_RTT_TS_RECENT_AGE 8 +#define S_TCB_T_RTT_TS_RECENT_AGE 16 +#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL +#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE) + +#define W_TCB_T_RTSEQ_RECENT 9 +#define S_TCB_T_RTSEQ_RECENT 16 +#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL +#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT) + +#define W_TCB_T_SRTT 10 +#define S_TCB_T_SRTT 16 +#define M_TCB_T_SRTT 0xffffULL +#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT) + +#define W_TCB_T_RTTVAR 11 +#define S_TCB_T_RTTVAR 0 +#define M_TCB_T_RTTVAR 0xffffULL +#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR) + +#define W_TCB_TS_LAST_ACK_SENT_RAW 11 +#define S_TCB_TS_LAST_ACK_SENT_RAW 16 +#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL +#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW) + +#define W_TCB_DIP 12 +#define S_TCB_DIP 11 +#define M_TCB_DIP 0xffffffffULL +#define V_TCB_DIP(x) ((x) << S_TCB_DIP) + +#define W_TCB_SIP 13 +#define S_TCB_SIP 11 +#define M_TCB_SIP 0xffffffffULL +#define V_TCB_SIP(x) ((x) << S_TCB_SIP) + +#define W_TCB_DP 14 +#define S_TCB_DP 11 +#define M_TCB_DP 0xffffULL +#define V_TCB_DP(x) ((x) << S_TCB_DP) + +#define W_TCB_SP 14 +#define S_TCB_SP 27 +#define M_TCB_SP 0xffffULL +#define V_TCB_SP(x) ((x) << S_TCB_SP) + +#define W_TCB_TIMESTAMP 15 +#define S_TCB_TIMESTAMP 11 +#define M_TCB_TIMESTAMP 0xffffffffULL +#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP) + +#define W_TCB_TIMESTAMP_OFFSET 16 +#define S_TCB_TIMESTAMP_OFFSET 11 +#define M_TCB_TIMESTAMP_OFFSET 0xfULL +#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET) + +#define W_TCB_TX_MAX 16 +#define S_TCB_TX_MAX 15 +#define M_TCB_TX_MAX 0xffffffffULL +#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX) + +#define W_TCB_TX_HDR_PTR_RAW 17 +#define S_TCB_TX_HDR_PTR_RAW 15 +#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL +#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW) + +#define W_TCB_TX_LAST_PTR_RAW 18 +#define S_TCB_TX_LAST_PTR_RAW 0 +#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL +#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW) + +#define W_TCB_TX_COMPACT 18 +#define S_TCB_TX_COMPACT 17 +#define M_TCB_TX_COMPACT 0x1ULL +#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT) + +#define W_TCB_RX_COMPACT 18 +#define S_TCB_RX_COMPACT 18 +#define M_TCB_RX_COMPACT 0x1ULL +#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT) + +#define W_TCB_RCV_WND 18 +#define S_TCB_RCV_WND 19 +#define M_TCB_RCV_WND 0x7ffffffULL +#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND) + +#define W_TCB_RX_HDR_OFFSET 19 +#define S_TCB_RX_HDR_OFFSET 14 +#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL +#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET) + +#define W_TCB_RX_FRAG0_START_IDX_RAW 20 +#define S_TCB_RX_FRAG0_START_IDX_RAW 9 +#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL +#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW) + +#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21 +#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4 +#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL +#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET) + +#define W_TCB_RX_FRAG0_LEN 21 +#define S_TCB_RX_FRAG0_LEN 31 +#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL +#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN) + +#define W_TCB_RX_FRAG1_LEN 22 +#define S_TCB_RX_FRAG1_LEN 26 +#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL +#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN) + +#define W_TCB_NEWRENO_RECOVER 23 +#define S_TCB_NEWRENO_RECOVER 21 +#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL +#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER) + +#define W_TCB_PDU_HAVE_LEN 24 +#define S_TCB_PDU_HAVE_LEN 16 +#define M_TCB_PDU_HAVE_LEN 0x1ULL +#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN) + +#define W_TCB_PDU_LEN 24 +#define S_TCB_PDU_LEN 17 +#define M_TCB_PDU_LEN 0xffffULL +#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN) + +#define W_TCB_RX_QUIESCE 25 +#define S_TCB_RX_QUIESCE 1 +#define M_TCB_RX_QUIESCE 0x1ULL +#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE) + +#define W_TCB_RX_PTR_RAW 25 +#define S_TCB_RX_PTR_RAW 2 +#define M_TCB_RX_PTR_RAW 0x1ffffULL +#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW) + +#define W_TCB_CPU_NO 25 +#define S_TCB_CPU_NO 19 +#define M_TCB_CPU_NO 0x7fULL +#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO) + +#define W_TCB_ULP_TYPE 25 +#define S_TCB_ULP_TYPE 26 +#define M_TCB_ULP_TYPE 0xfULL +#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE) + +#define W_TCB_RX_FRAG1_PTR_RAW 25 +#define S_TCB_RX_FRAG1_PTR_RAW 30 +#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL +#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW) + +#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26 +#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15 +#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL +#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW) + +#define W_TCB_RX_FRAG2_PTR_RAW 27 +#define S_TCB_RX_FRAG2_PTR_RAW 10 +#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL +#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW) + +#define W_TCB_RX_FRAG2_LEN_RAW 27 +#define S_TCB_RX_FRAG2_LEN_RAW 27 +#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL +#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW) + +#define W_TCB_RX_FRAG3_PTR_RAW 28 +#define S_TCB_RX_FRAG3_PTR_RAW 22 +#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL +#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW) + +#define W_TCB_RX_FRAG3_LEN_RAW 29 +#define S_TCB_RX_FRAG3_LEN_RAW 7 +#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL +#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW) + +#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30 +#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2 +#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL +#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW) + +#define W_TCB_PDU_HDR_LEN 30 +#define S_TCB_PDU_HDR_LEN 29 +#define M_TCB_PDU_HDR_LEN 0xffULL +#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN) + +#define W_TCB_SLUSH1 31 +#define S_TCB_SLUSH1 5 +#define M_TCB_SLUSH1 0x7ffffULL +#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1) + +#define W_TCB_ULP_RAW 31 +#define S_TCB_ULP_RAW 24 +#define M_TCB_ULP_RAW 0xffULL +#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW) + +#define W_TCB_DDP_RDMAP_VERSION 25 +#define S_TCB_DDP_RDMAP_VERSION 30 +#define M_TCB_DDP_RDMAP_VERSION 0x1ULL +#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION) + +#define W_TCB_MARKER_ENABLE_RX 25 +#define S_TCB_MARKER_ENABLE_RX 31 +#define M_TCB_MARKER_ENABLE_RX 0x1ULL +#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX) + +#define W_TCB_MARKER_ENABLE_TX 26 +#define S_TCB_MARKER_ENABLE_TX 0 +#define M_TCB_MARKER_ENABLE_TX 0x1ULL +#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX) + +#define W_TCB_CRC_ENABLE 26 +#define S_TCB_CRC_ENABLE 1 +#define M_TCB_CRC_ENABLE 0x1ULL +#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE) + +#define W_TCB_IRS_ULP 26 +#define S_TCB_IRS_ULP 2 +#define M_TCB_IRS_ULP 0x1ffULL +#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP) + +#define W_TCB_ISS_ULP 26 +#define S_TCB_ISS_ULP 11 +#define M_TCB_ISS_ULP 0x1ffULL +#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP) + +#define W_TCB_TX_PDU_LEN 26 +#define S_TCB_TX_PDU_LEN 20 +#define M_TCB_TX_PDU_LEN 0x3fffULL +#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN) + +#define W_TCB_TX_PDU_OUT 27 +#define S_TCB_TX_PDU_OUT 2 +#define M_TCB_TX_PDU_OUT 0x1ULL +#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT) + +#define W_TCB_CQ_IDX_SQ 27 +#define S_TCB_CQ_IDX_SQ 3 +#define M_TCB_CQ_IDX_SQ 0xffffULL +#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ) + +#define W_TCB_CQ_IDX_RQ 27 +#define S_TCB_CQ_IDX_RQ 19 +#define M_TCB_CQ_IDX_RQ 0xffffULL +#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ) + +#define W_TCB_QP_ID 28 +#define S_TCB_QP_ID 3 +#define M_TCB_QP_ID 0xffffULL +#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID) + +#define W_TCB_PD_ID 28 +#define S_TCB_PD_ID 19 +#define M_TCB_PD_ID 0xffffULL +#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID) + +#define W_TCB_STAG 29 +#define S_TCB_STAG 3 +#define M_TCB_STAG 0xffffffffULL +#define V_TCB_STAG(x) ((x) << S_TCB_STAG) + +#define W_TCB_RQ_START 30 +#define S_TCB_RQ_START 3 +#define M_TCB_RQ_START 0x3ffffffULL +#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START) + +#define W_TCB_RQ_MSN 30 +#define S_TCB_RQ_MSN 29 +#define M_TCB_RQ_MSN 0x3ffULL +#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN) + +#define W_TCB_RQ_MAX_OFFSET 31 +#define S_TCB_RQ_MAX_OFFSET 7 +#define M_TCB_RQ_MAX_OFFSET 0xfULL +#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET) + +#define W_TCB_RQ_WRITE_PTR 31 +#define S_TCB_RQ_WRITE_PTR 11 +#define M_TCB_RQ_WRITE_PTR 0x3ffULL +#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR) + +#define W_TCB_INB_WRITE_PERM 31 +#define S_TCB_INB_WRITE_PERM 21 +#define M_TCB_INB_WRITE_PERM 0x1ULL +#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM) + +#define W_TCB_INB_READ_PERM 31 +#define S_TCB_INB_READ_PERM 22 +#define M_TCB_INB_READ_PERM 0x1ULL +#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM) + +#define W_TCB_ORD_L_BIT_VLD 31 +#define S_TCB_ORD_L_BIT_VLD 23 +#define M_TCB_ORD_L_BIT_VLD 0x1ULL +#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD) + +#define W_TCB_RDMAP_OPCODE 31 +#define S_TCB_RDMAP_OPCODE 24 +#define M_TCB_RDMAP_OPCODE 0xfULL +#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE) + +#define W_TCB_TX_FLUSH 31 +#define S_TCB_TX_FLUSH 28 +#define M_TCB_TX_FLUSH 0x1ULL +#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH) + +#define W_TCB_TX_OOS_RXMT 31 +#define S_TCB_TX_OOS_RXMT 29 +#define M_TCB_TX_OOS_RXMT 0x1ULL +#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT) + +#define W_TCB_TX_OOS_TXMT 31 +#define S_TCB_TX_OOS_TXMT 30 +#define M_TCB_TX_OOS_TXMT 0x1ULL +#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT) + +#define W_TCB_SLUSH_AUX2 31 +#define S_TCB_SLUSH_AUX2 31 +#define M_TCB_SLUSH_AUX2 0x1ULL +#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2) + +#define W_TCB_RX_FRAG1_PTR_RAW2 25 +#define S_TCB_RX_FRAG1_PTR_RAW2 30 +#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL +#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2) + +#define W_TCB_RX_DDP_FLAGS 26 +#define S_TCB_RX_DDP_FLAGS 15 +#define M_TCB_RX_DDP_FLAGS 0x3ffULL +#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS) + +#define W_TCB_SLUSH_AUX3 26 +#define S_TCB_SLUSH_AUX3 31 +#define M_TCB_SLUSH_AUX3 0x1ffULL +#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3) + +#define W_TCB_RX_DDP_BUF0_OFFSET 27 +#define S_TCB_RX_DDP_BUF0_OFFSET 8 +#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL +#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET) + +#define W_TCB_RX_DDP_BUF0_LEN 27 +#define S_TCB_RX_DDP_BUF0_LEN 30 +#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL +#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN) + +#define W_TCB_RX_DDP_BUF1_OFFSET 28 +#define S_TCB_RX_DDP_BUF1_OFFSET 20 +#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL +#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET) + +#define W_TCB_RX_DDP_BUF1_LEN 29 +#define S_TCB_RX_DDP_BUF1_LEN 10 +#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL +#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN) + +#define W_TCB_RX_DDP_BUF0_TAG 30 +#define S_TCB_RX_DDP_BUF0_TAG 0 +#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL +#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG) + +#define W_TCB_RX_DDP_BUF1_TAG 31 +#define S_TCB_RX_DDP_BUF1_TAG 0 +#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL +#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG) + +#define S_TF_DACK 10 +#define V_TF_DACK(x) ((x) << S_TF_DACK) + +#define S_TF_NAGLE 11 +#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE) + +#define S_TF_RECV_SCALE 12 +#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE) + +#define S_TF_RECV_TSTMP 13 +#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP) + +#define S_TF_RECV_SACK 14 +#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK) + +#define S_TF_TURBO 15 +#define V_TF_TURBO(x) ((x) << S_TF_TURBO) + +#define S_TF_KEEPALIVE 16 +#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE) + +#define S_TF_TCAM_BYPASS 17 +#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS) + +#define S_TF_CORE_FIN 18 +#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN) + +#define S_TF_CORE_MORE 19 +#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE) + +#define S_TF_MIGRATING 20 +#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING) + +#define S_TF_ACTIVE_OPEN 21 +#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN) + +#define S_TF_ASK_MODE 22 +#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE) + +#define S_TF_NON_OFFLOAD 23 +#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD) + +#define S_TF_MOD_SCHD 24 +#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD) + +#define S_TF_MOD_SCHD_REASON0 25 +#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0) + +#define S_TF_MOD_SCHD_REASON1 26 +#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1) + +#define S_TF_MOD_SCHD_RX 27 +#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX) + +#define S_TF_CORE_PUSH 28 +#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH) + +#define S_TF_RCV_COALESCE_ENABLE 29 +#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE) + +#define S_TF_RCV_COALESCE_PUSH 30 +#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH) + +#define S_TF_RCV_COALESCE_LAST_PSH 31 +#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH) + +#define S_TF_RCV_COALESCE_HEARTBEAT 32 +#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT) + +#define S_TF_HALF_CLOSE 33 +#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE) + +#define S_TF_DACK_MSS 34 +#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS) + +#define S_TF_CCTRL_SEL0 35 +#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0) + +#define S_TF_CCTRL_SEL1 36 +#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1) + +#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37 +#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY) + +#define S_TF_TX_PACE_AUTO 38 +#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO) + +#define S_TF_PEER_FIN_HELD 39 +#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD) + +#define S_TF_CORE_URG 40 +#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG) + +#define S_TF_RDMA_ERROR 41 +#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR) + +#define S_TF_SSWS_DISABLED 42 +#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED) + +#define S_TF_DUPACK_COUNT_ODD 43 +#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD) + +#define S_TF_TX_CHANNEL 44 +#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL) + +#define S_TF_RX_CHANNEL 45 +#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL) + +#define S_TF_TX_PACE_FIXED 46 +#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED) + +#define S_TF_RDMA_FLM_ERROR 47 +#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR) + +#define S_TF_RX_FLOW_CONTROL_DISABLE 48 +#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE) + +#endif /* _TCB_DEFS_H */ diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig new file mode 100644 index 00000000..6b7e6c54 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -0,0 +1,18 @@ +config INFINIBAND_CXGB4 + tristate "Chelsio T4 RDMA Driver" + depends on CHELSIO_T4 && INET + select GENERIC_ALLOCATOR + ---help--- + This is an iWARP/RDMA driver for the Chelsio T4 1GbE and + 10GbE adapters. + + For general information about Chelsio and our products, visit + our website at . + + For customer support, please visit our customer support page at + . + + Please send feedback to . + + To compile this driver as a module, choose M here: the module + will be called iw_cxgb4. diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile new file mode 100644 index 00000000..46b878ca --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -0,0 +1,5 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 + +obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o + +iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c new file mode 100644 index 00000000..4c7c62fe --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -0,0 +1,2722 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "iw_cxgb4.h" + +static char *states[] = { + "idle", + "listen", + "connecting", + "mpa_wait_req", + "mpa_req_sent", + "mpa_req_rcvd", + "mpa_rep_sent", + "fpdu_mode", + "aborting", + "closing", + "moribund", + "dead", + NULL, +}; + +static int dack_mode = 1; +module_param(dack_mode, int, 0644); +MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); + +int c4iw_max_read_depth = 8; +module_param(c4iw_max_read_depth, int, 0644); +MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)"); + +static int enable_tcp_timestamps; +module_param(enable_tcp_timestamps, int, 0644); +MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)"); + +static int enable_tcp_sack; +module_param(enable_tcp_sack, int, 0644); +MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)"); + +static int enable_tcp_window_scaling = 1; +module_param(enable_tcp_window_scaling, int, 0644); +MODULE_PARM_DESC(enable_tcp_window_scaling, + "Enable tcp window scaling (default=1)"); + +int c4iw_debug; +module_param(c4iw_debug, int, 0644); +MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)"); + +static int peer2peer; +module_param(peer2peer, int, 0644); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); + +static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; +module_param(p2p_type, int, 0644); +MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: " + "1=RDMA_READ 0=RDMA_WRITE (default 1)"); + +static int ep_timeout_secs = 60; +module_param(ep_timeout_secs, int, 0644); +MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " + "in seconds (default=60)"); + +static int mpa_rev = 1; +module_param(mpa_rev, int, 0644); +MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " + "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" + " compliant (default=1)"); + +static int markers_enabled; +module_param(markers_enabled, int, 0644); +MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); + +static int crc_enabled = 1; +module_param(crc_enabled, int, 0644); +MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); + +static int rcv_win = 256 * 1024; +module_param(rcv_win, int, 0644); +MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)"); + +static int snd_win = 128 * 1024; +module_param(snd_win, int, 0644); +MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)"); + +static struct workqueue_struct *workq; + +static struct sk_buff_head rxq; + +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); +static void ep_timeout(unsigned long arg); +static void connect_reply_upcall(struct c4iw_ep *ep, int status); + +static LIST_HEAD(timeout_list); +static spinlock_t timeout_lock; + +static void start_ep_timer(struct c4iw_ep *ep) +{ + PDBG("%s ep %p\n", __func__, ep); + if (timer_pending(&ep->timer)) { + PDBG("%s stopped / restarted timer ep %p\n", __func__, ep); + del_timer_sync(&ep->timer); + } else + c4iw_get_ep(&ep->com); + ep->timer.expires = jiffies + ep_timeout_secs * HZ; + ep->timer.data = (unsigned long)ep; + ep->timer.function = ep_timeout; + add_timer(&ep->timer); +} + +static void stop_ep_timer(struct c4iw_ep *ep) +{ + PDBG("%s ep %p\n", __func__, ep); + if (!timer_pending(&ep->timer)) { + printk(KERN_ERR "%s timer stopped when its not running! " + "ep %p state %u\n", __func__, ep, ep->com.state); + WARN_ON(1); + return; + } + del_timer_sync(&ep->timer); + c4iw_put_ep(&ep->com); +} + +static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, + struct l2t_entry *l2e) +{ + int error = 0; + + if (c4iw_fatal_error(rdev)) { + kfree_skb(skb); + PDBG("%s - device in error state - dropping\n", __func__); + return -EIO; + } + error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); + if (error < 0) + kfree_skb(skb); + return error < 0 ? error : 0; +} + +int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) +{ + int error = 0; + + if (c4iw_fatal_error(rdev)) { + kfree_skb(skb); + PDBG("%s - device in error state - dropping\n", __func__); + return -EIO; + } + error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); + if (error < 0) + kfree_skb(skb); + return error < 0 ? error : 0; +} + +static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) +{ + struct cpl_tid_release *req; + + skb = get_skb(skb, sizeof *req, GFP_KERNEL); + if (!skb) + return; + req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); + INIT_TP_WR(req, hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); + set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + c4iw_ofld_send(rdev, skb); + return; +} + +static void set_emss(struct c4iw_ep *ep, u16 opt) +{ + ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40; + ep->mss = ep->emss; + if (GET_TCPOPT_TSTAMP(opt)) + ep->emss -= 12; + if (ep->emss < 128) + ep->emss = 128; + PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt), + ep->mss, ep->emss); +} + +static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) +{ + enum c4iw_ep_state state; + + mutex_lock(&epc->mutex); + state = epc->state; + mutex_unlock(&epc->mutex); + return state; +} + +static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + epc->state = new; +} + +static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + mutex_lock(&epc->mutex); + PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]); + __state_set(epc, new); + mutex_unlock(&epc->mutex); + return; +} + +static void *alloc_ep(int size, gfp_t gfp) +{ + struct c4iw_ep_common *epc; + + epc = kzalloc(size, gfp); + if (epc) { + kref_init(&epc->kref); + mutex_init(&epc->mutex); + c4iw_init_wr_wait(&epc->wr_wait); + } + PDBG("%s alloc ep %p\n", __func__, epc); + return epc; +} + +void _c4iw_free_ep(struct kref *kref) +{ + struct c4iw_ep *ep; + + ep = container_of(kref, struct c4iw_ep, com.kref); + PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + } + kfree(ep); +} + +static void release_ep_resources(struct c4iw_ep *ep) +{ + set_bit(RELEASE_RESOURCES, &ep->com.flags); + c4iw_put_ep(&ep->com); +} + +static int status2errno(int status) +{ + switch (status) { + case CPL_ERR_NONE: + return 0; + case CPL_ERR_CONN_RESET: + return -ECONNRESET; + case CPL_ERR_ARP_MISS: + return -EHOSTUNREACH; + case CPL_ERR_CONN_TIMEDOUT: + return -ETIMEDOUT; + case CPL_ERR_TCAM_FULL: + return -ENOMEM; + case CPL_ERR_CONN_EXIST: + return -EADDRINUSE; + default: + return -EIO; + } +} + +/* + * Try and reuse skbs already allocated... + */ +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) +{ + if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { + skb_trim(skb, 0); + skb_get(skb); + skb_reset_transport_header(skb); + } else { + skb = alloc_skb(len, gfp); + } + return skb; +} + +static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip, + __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct rtable *rt; + struct flowi4 fl4; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) + return NULL; + return rt; +} + +static void arp_failure_discard(void *handle, struct sk_buff *skb) +{ + PDBG("%s c4iw_dev %p\n", __func__, handle); + kfree_skb(skb); +} + +/* + * Handle an ARP failure for an active open. + */ +static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) +{ + printk(KERN_ERR MOD "ARP failure duing connect\n"); + kfree_skb(skb); +} + +/* + * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant + * and send it along. + */ +static void abort_arp_failure(void *handle, struct sk_buff *skb) +{ + struct c4iw_rdev *rdev = handle; + struct cpl_abort_req *req = cplhdr(skb); + + PDBG("%s rdev %p\n", __func__, rdev); + req->cmd = CPL_ABORT_NO_RST; + c4iw_ofld_send(rdev, skb); +} + +static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) +{ + unsigned int flowclen = 80; + struct fw_flowc_wr *flowc; + int i; + + skb = get_skb(skb, flowclen, GFP_KERNEL); + flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); + + flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) | + FW_FLOWC_WR_NPARAMS(8)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen, + 16)) | FW_WR_FLOWID(ep->hwtid)); + + flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; + flowc->mnemval[0].val = cpu_to_be32(PCI_FUNC(ep->com.dev->rdev.lldi.pdev->devfn) << 8); + flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; + flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan); + flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; + flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan); + flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; + flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid); + flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; + flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq); + flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; + flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); + flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; + flowc->mnemval[6].val = cpu_to_be32(snd_win); + flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; + flowc->mnemval[7].val = cpu_to_be32(ep->emss); + /* Pad WR to 16 byte boundary */ + flowc->mnemval[8].mnemonic = 0; + flowc->mnemval[8].val = 0; + for (i = 0; i < 9; i++) { + flowc->mnemval[i].r4[0] = 0; + flowc->mnemval[i].r4[1] = 0; + flowc->mnemval[i].r4[2] = 0; + } + + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, skb); +} + +static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) +{ + struct cpl_close_con_req *req; + struct sk_buff *skb; + int wrlen = roundup(sizeof *req, 16); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + skb = get_skb(NULL, wrlen, gfp); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + req = (struct cpl_close_con_req *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, + ep->hwtid)); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) +{ + struct cpl_abort_req *req; + int wrlen = roundup(sizeof *req, 16); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + skb = get_skb(skb, wrlen, gfp); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb.\n", + __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure); + req = (struct cpl_abort_req *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); + req->cmd = CPL_ABORT_SEND_RST; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int send_connect(struct c4iw_ep *ep) +{ + struct cpl_act_open_req *req; + struct sk_buff *skb; + u64 opt0; + u32 opt2; + unsigned int mtu_idx; + int wscale; + int wrlen = roundup(sizeof *req, 16); + + PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb.\n", + __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); + + cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + wscale = compute_wscale(rcv_win); + opt0 = KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(rcv_win>>10); + opt2 = RX_CHANNEL(0) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + if (enable_tcp_timestamps) + opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack) + opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + opt2 |= WND_SCALE_EN(1); + t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); + + req = (struct cpl_act_open_req *) skb_put(skb, wrlen); + INIT_TP_WR(req, 0); + OPCODE_TID(req) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ((ep->rss_qid<<14)|ep->atid))); + req->local_port = ep->com.local_addr.sin_port; + req->peer_port = ep->com.remote_addr.sin_port; + req->local_ip = ep->com.local_addr.sin_addr.s_addr; + req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + req->params = 0; + req->opt2 = cpu_to_be32(opt2); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, + u8 mpa_rev_to_use) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct mpa_v2_conn_params mpa_v2_params; + + PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); + + BUG_ON(skb_cloned(skb)); + + mpalen = sizeof(*mpa) + ep->plen; + if (mpa_rev_to_use == 2) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof *req, 16); + skb = get_skb(skb, wrlen, GFP_KERNEL); + if (!skb) { + connect_reply_upcall(ep, -ENOMEM); + return; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen); + memset(req, 0, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL(1) | + FW_WR_IMMDLEN(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH(1) | + FW_OFLD_TX_DATA_WR_SHOVE(1)); + + mpa = (struct mpa_message *)(req + 1); + memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); + mpa->flags = (crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0) | + (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); + mpa->private_data_size = htons(ep->plen); + mpa->revision = mpa_rev_to_use; + if (mpa_rev_to_use == 1) { + ep->tried_with_mpa_v1 = 1; + ep->retry_with_mpa_v1 = 0; + } + + if (mpa_rev_to_use == 2) { + mpa->private_data_size += + htons(sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + + if (peer2peer) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), + ep->mpa_pkt + sizeof(*mpa), ep->plen); + } else + if (ep->plen) + memcpy(mpa->private_data, + ep->mpa_pkt + sizeof(*mpa), ep->plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + BUG_ON(ep->mpa_skb); + ep->mpa_skb = skb; + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + start_ep_timer(ep); + state_set(&ep->com, MPA_REQ_SENT); + ep->mpa_attr.initiator = 1; + return; +} + +static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; + + PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); + + mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof *req, 16); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen); + memset(req, 0, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL(1) | + FW_WR_IMMDLEN(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH(1) | + FW_OFLD_TX_DATA_WR_SHOVE(1)); + + mpa = (struct mpa_message *)(req + 1); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = MPA_REJECT; + mpa->revision = mpa_rev; + mpa->private_data_size = htons(plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size += + htons(sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons(((u16)ep->ird) | + (peer2peer ? MPA_V2_PEER2PEER_MODEL : + 0)); + mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? + (p2p_type == + FW_RI_INIT_P2PTYPE_RDMA_WRITE ? + MPA_V2_RDMA_WRITE_RTR : p2p_type == + FW_RI_INIT_P2PTYPE_READ_REQ ? + MPA_V2_RDMA_READ_RTR : 0) : 0)); + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb again. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + BUG_ON(ep->mpa_skb); + ep->mpa_skb = skb; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; + + PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); + + mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof *req, 16); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL(1) | + FW_WR_IMMDLEN(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH(1) | + FW_OFLD_TX_DATA_WR_SHOVE(1)); + + mpa = (struct mpa_message *)(req + 1); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->revision = ep->mpa_attr.version; + mpa->private_data_size = htons(plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size += + htons(sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + if (peer2peer && (ep->mpa_attr.p2p_type != + FW_RI_INIT_P2PTYPE_DISABLED)) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + ep->mpa_skb = skb; + state_set(&ep->com, MPA_REP_SENT); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_act_establish *req = cplhdr(skb); + unsigned int tid = GET_TID(req); + unsigned int atid = GET_TID_TID(ntohl(req->tos_atid)); + struct tid_info *t = dev->rdev.lldi.tids; + + ep = lookup_atid(t, atid); + + PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, + be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + + dst_confirm(ep->dst); + + /* setup the hwtid for this connection */ + ep->hwtid = tid; + cxgb4_insert_tid(t, ep, tid); + + ep->snd_seq = be32_to_cpu(req->snd_isn); + ep->rcv_seq = be32_to_cpu(req->rcv_isn); + + set_emss(ep, ntohs(req->tcp_opt)); + + /* dealloc the atid */ + cxgb4_free_atid(t, atid); + + /* start MPA negotiation */ + send_flowc(ep, NULL); + if (ep->retry_with_mpa_v1) + send_mpa_req(ep, skb, 1); + else + send_mpa_req(ep, skb, mpa_rev); + + return 0; +} + +static void close_complete_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + if (ep->com.cm_id) { + PDBG("close complete delivered ep %p cm_id %p tid %u\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) +{ + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + close_complete_upcall(ep); + state_set(&ep->com, ABORTING); + return send_abort(ep, skb, gfp); +} + +static void peer_close_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_DISCONNECT; + if (ep->com.cm_id) { + PDBG("peer close delivered ep %p cm_id %p tid %u\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static void peer_abort_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = -ECONNRESET; + if (ep->com.cm_id) { + PDBG("abort delivered ep %p cm_id %p tid %u\n", ep, + ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void connect_reply_upcall(struct c4iw_ep *ep, int status) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REPLY; + event.status = status; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + + if ((status == 0) || (status == -ECONNREFUSED)) { + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used */ + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message); + } + } + + PDBG("%s ep %p tid %u status %d\n", __func__, ep, + ep->hwtid, status); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + + if (status < 0) { + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void connect_request_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REQUEST; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + event.provider_data = ep; + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.ord = ep->ord; + event.ird = ep->ird; + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used. Send max supported */ + event.ord = c4iw_max_read_depth; + event.ird = c4iw_max_read_depth; + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + } + if (state_read(&ep->parent_ep->com) != DEAD) { + c4iw_get_ep(&ep->com); + ep->parent_ep->com.cm_id->event_handler( + ep->parent_ep->com.cm_id, + &event); + } + c4iw_put_ep(&ep->parent_ep->com); + ep->parent_ep = NULL; +} + +static void established_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_ESTABLISHED; + event.ird = ep->ird; + event.ord = ep->ord; + if (ep->com.cm_id) { + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static int update_rx_credits(struct c4iw_ep *ep, u32 credits) +{ + struct cpl_rx_data_ack *req; + struct sk_buff *skb; + int wrlen = roundup(sizeof *req, 16); + + PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n"); + return 0; + } + + req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, + ep->hwtid)); + req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) | + F_RX_DACK_CHANGE | + V_RX_DACK_MODE(dack_mode)); + set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, skb); + return credits; +} + +static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; + u16 plen; + u16 resp_ird, resp_ord; + u8 rtr_mismatch = 0, insuff_ird = 0; + struct c4iw_qp_attributes attrs; + enum c4iw_qp_attr_mask mask; + int err; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + /* + * Stop mpa timer. If it expired, then the state has + * changed and we bail since ep_timeout already aborted + * the connection. + */ + stop_ep_timer(ep); + if (state_read(&ep->com) != MPA_REQ_SENT) + return; + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { + err = -EINVAL; + goto err; + } + + /* + * copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * if we don't even have the mpa message, then bail. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return; + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* Validate MPA header. */ + if (mpa->revision > mpa_rev) { + printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," + " Received = %d\n", __func__, mpa_rev, mpa->revision); + err = -EPROTO; + goto err; + } + if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { + err = -EPROTO; + goto err; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + err = -EPROTO; + goto err; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + err = -EPROTO; + goto err; + } + + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return; + + if (mpa->flags & MPA_REJECT) { + err = -ECONNREFUSED; + goto err; + } + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. And + * the MPA header is valid. + */ + state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa->revision; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + resp_ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + resp_ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + + /* + * This is a double-check. Ideally, below checks are + * not required since ird/ord stuff has been taken + * care of in c4iw_accept_cr + */ + if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { + err = -ENOMEM; + ep->ird = resp_ord; + ep->ord = resp_ird; + insuff_ird = 1; + } + + if (ntohs(mpa_v2_params->ird) & + MPA_V2_PEER2PEER_MODEL) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = " + "%d\n", __func__, ep->mpa_attr.crc_enabled, + ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type, p2p_type); + + /* + * If responder's RTR does not match with that of initiator, assign + * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not + * generated when moving QP to RTS state. + * A TERM message will be sent after QP has moved to RTS state + */ + if ((ep->mpa_attr.version == 2) && peer2peer && + (ep->mpa_attr.p2p_type != p2p_type)) { + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + rtr_mismatch = 1; + } + + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = C4IW_QP_STATE_RTS; + + mask = C4IW_QP_ATTR_NEXT_STATE | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; + + /* bind QP and TID with INIT_WR */ + err = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err; + + /* + * If responder's RTR requirement did not match with what initiator + * supports, generate TERM message + */ + if (rtr_mismatch) { + printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_NOMATCH_RTR; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + err = -ENOMEM; + goto out; + } + + /* + * Generate TERM if initiator IRD is not sufficient for responder + * provided ORD. Currently, we do the same behaviour even when + * responder provided IRD is also not sufficient as regards to + * initiator ORD. + */ + if (insuff_ird) { + printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", + __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_INSUFF_IRD; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + err = -ENOMEM; + goto out; + } + goto out; +err: + state_set(&ep->com, ABORTING); + send_abort(ep, skb, GFP_KERNEL); +out: + connect_reply_upcall(ep, err); + return; +} + +static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; + u16 plen; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + if (state_read(&ep->com) != MPA_REQ_WAIT) + return; + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { + stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + + /* + * Copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * If we don't even have the mpa message, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return; + + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + stop_ep_timer(ep); + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* + * Validate MPA Header. + */ + if (mpa->revision > mpa_rev) { + printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," + " Received = %d\n", __func__, mpa_rev, mpa->revision); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + abort_connection(ep, skb, GFP_KERNEL); + return; + } + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return; + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. + */ + ep->mpa_attr.initiator = 0; + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa->revision; + if (mpa->revision == 1) + ep->tried_with_mpa_v1 = 1; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + ep->ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + ep->ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) + if (peer2peer) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type); + + state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + connect_request_upcall(ep); + return; +} + +static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_rx_data *hdr = cplhdr(skb); + unsigned int dlen = ntohs(hdr->len); + unsigned int tid = GET_TID(hdr); + struct tid_info *t = dev->rdev.lldi.tids; + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); + skb_pull(skb, sizeof(*hdr)); + skb_trim(skb, dlen); + + ep->rcv_seq += dlen; + BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); + + /* update RX credits */ + update_rx_credits(ep, dlen); + + switch (state_read(&ep->com)) { + case MPA_REQ_SENT: + process_mpa_reply(ep, skb); + break; + case MPA_REQ_WAIT: + process_mpa_request(ep, skb); + break; + case MPA_REP_SENT: + break; + default: + printk(KERN_ERR MOD "%s Unexpected streaming data." + " ep %p state %d tid %u\n", + __func__, ep, state_read(&ep->com), ep->hwtid); + + /* + * The ep will timeout and inform the ULP of the failure. + * See ep_timeout(). + */ + break; + } + return 0; +} + +static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_abort_rpl_rss *rpl = cplhdr(skb); + int release = 0; + unsigned int tid = GET_TID(rpl); + struct tid_info *t = dev->rdev.lldi.tids; + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + BUG_ON(!ep); + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case ABORTING: + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + printk(KERN_ERR "%s ep %p state %d\n", + __func__, ep, ep->com.state); + break; + } + mutex_unlock(&ep->com.mutex); + + if (release) + release_ep_resources(ep); + return 0; +} + +/* + * Return whether a failed active open has allocated a TID + */ +static inline int act_open_has_tid(int status) +{ + return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && + status != CPL_ERR_ARP_MISS; +} + +static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_act_open_rpl *rpl = cplhdr(skb); + unsigned int atid = GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status))); + struct tid_info *t = dev->rdev.lldi.tids; + int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status)); + + ep = lookup_atid(t, atid); + + PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, + status, status2errno(status)); + + if (status == CPL_ERR_RTX_NEG_ADVICE) { + printk(KERN_WARNING MOD "Connection problems for atid %u\n", + atid); + return 0; + } + + connect_reply_upcall(ep, status2errno(status)); + state_set(&ep->com, DEAD); + + if (status && act_open_has_tid(status)) + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); + + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); + + return 0; +} + +static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_pass_open_rpl *rpl = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int stid = GET_TID(rpl); + struct c4iw_listen_ep *ep = lookup_stid(t, stid); + + if (!ep) { + printk(KERN_ERR MOD "stid %d lookup failure!\n", stid); + return 0; + } + PDBG("%s ep %p status %d error %d\n", __func__, ep, + rpl->status, status2errno(rpl->status)); + c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); + + return 0; +} + +static int listen_stop(struct c4iw_listen_ep *ep) +{ + struct sk_buff *skb; + struct cpl_close_listsvr_req *req; + + PDBG("%s ep %p\n", __func__, ep); + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + return -ENOMEM; + } + req = (struct cpl_close_listsvr_req *) skb_put(skb, sizeof(*req)); + INIT_TP_WR(req, 0); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, + ep->stid)); + req->reply_ctrl = cpu_to_be16( + QUEUENO(ep->com.dev->rdev.lldi.rxq_ids[0])); + set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + return c4iw_ofld_send(&ep->com.dev->rdev, skb); +} + +static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int stid = GET_TID(rpl); + struct c4iw_listen_ep *ep = lookup_stid(t, stid); + + PDBG("%s ep %p\n", __func__, ep); + c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); + return 0; +} + +static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, + struct cpl_pass_accept_req *req) +{ + struct cpl_pass_accept_rpl *rpl; + unsigned int mtu_idx; + u64 opt0; + u32 opt2; + int wscale; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + BUG_ON(skb_cloned(skb)); + skb_trim(skb, sizeof(*rpl)); + skb_get(skb); + cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + wscale = compute_wscale(rcv_win); + opt0 = KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(rcv_win>>10); + opt2 = RX_CHANNEL(0) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + + if (enable_tcp_timestamps && req->tcpopt.tstamp) + opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack && req->tcpopt.sack) + opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + opt2 |= WND_SCALE_EN(1); + + rpl = cplhdr(skb); + INIT_TP_WR(rpl, ep->hwtid); + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + rpl->opt0 = cpu_to_be64(opt0); + rpl->opt2 = cpu_to_be32(opt2); + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + + return; +} + +static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip, + struct sk_buff *skb) +{ + PDBG("%s c4iw_dev %p tid %u peer_ip %x\n", __func__, dev, hwtid, + peer_ip); + BUG_ON(skb_cloned(skb)); + skb_trim(skb, sizeof(struct cpl_tid_release)); + skb_get(skb); + release_tid(&dev->rdev, hwtid, skb); + return; +} + +static void get_4tuple(struct cpl_pass_accept_req *req, + __be32 *local_ip, __be32 *peer_ip, + __be16 *local_port, __be16 *peer_port) +{ + int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len)); + int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len)); + struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); + struct tcphdr *tcp = (struct tcphdr *) + ((u8 *)(req + 1) + eth_len + ip_len); + + PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, + ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), + ntohs(tcp->dest)); + + *peer_ip = ip->saddr; + *local_ip = ip->daddr; + *peer_port = tcp->source; + *local_port = tcp->dest; + + return; +} + +static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, + struct c4iw_dev *cdev, bool clear_mpa_v1) +{ + struct neighbour *n; + int err, step; + + n = dst_neigh_lookup(dst, &peer_ip); + if (!n) + return -ENODEV; + + rcu_read_lock(); + err = -ENOMEM; + if (n->dev->flags & IFF_LOOPBACK) { + struct net_device *pdev; + + pdev = ip_dev_find(&init_net, peer_ip); + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, 0); + if (!ep->l2t) + goto out; + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, n->dev, 0); + if (!ep->l2t) + goto out; + ep->mtu = dst_mtu(dst); + ep->tx_chan = cxgb4_port_chan(n->dev); + ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(n->dev) * step; + ep->ctrlq_idx = cxgb4_port_idx(n->dev); + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(n->dev) * step]; + + if (clear_mpa_v1) { + ep->retry_with_mpa_v1 = 0; + ep->tried_with_mpa_v1 = 0; + } + } + err = 0; +out: + rcu_read_unlock(); + + neigh_release(n); + + return err; +} + +static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *child_ep, *parent_ep; + struct cpl_pass_accept_req *req = cplhdr(skb); + unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int hwtid = GET_TID(req); + struct dst_entry *dst; + struct rtable *rt; + __be32 local_ip, peer_ip; + __be16 local_port, peer_port; + int err; + + parent_ep = lookup_stid(t, stid); + PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); + + get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port); + + if (state_read(&parent_ep->com) != LISTEN) { + printk(KERN_ERR "%s - listening ep not in LISTEN\n", + __func__); + goto reject; + } + + /* Find output route */ + rt = find_route(dev, local_ip, peer_ip, local_port, peer_port, + GET_POPEN_TOS(ntohl(req->tos_stid))); + if (!rt) { + printk(KERN_ERR MOD "%s - failed to find dst entry!\n", + __func__); + goto reject; + } + dst = &rt->dst; + + child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (!child_ep) { + printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", + __func__); + dst_release(dst); + goto reject; + } + + err = import_ep(child_ep, peer_ip, dst, dev, false); + if (err) { + printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", + __func__); + dst_release(dst); + kfree(child_ep); + goto reject; + } + + state_set(&child_ep->com, CONNECTING); + child_ep->com.dev = dev; + child_ep->com.cm_id = NULL; + child_ep->com.local_addr.sin_family = PF_INET; + child_ep->com.local_addr.sin_port = local_port; + child_ep->com.local_addr.sin_addr.s_addr = local_ip; + child_ep->com.remote_addr.sin_family = PF_INET; + child_ep->com.remote_addr.sin_port = peer_port; + child_ep->com.remote_addr.sin_addr.s_addr = peer_ip; + c4iw_get_ep(&parent_ep->com); + child_ep->parent_ep = parent_ep; + child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid)); + child_ep->dst = dst; + child_ep->hwtid = hwtid; + + PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__, + child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); + + init_timer(&child_ep->timer); + cxgb4_insert_tid(t, child_ep, hwtid); + accept_cr(child_ep, peer_ip, skb, req); + goto out; +reject: + reject_cr(dev, hwtid, peer_ip, skb); +out: + return 0; +} + +static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_pass_establish *req = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(req); + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + ep->snd_seq = be32_to_cpu(req->snd_isn); + ep->rcv_seq = be32_to_cpu(req->rcv_isn); + + set_emss(ep, ntohs(req->tcp_opt)); + + dst_confirm(ep->dst); + state_set(&ep->com, MPA_REQ_WAIT); + start_ep_timer(ep); + send_flowc(ep, skb); + + return 0; +} + +static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_peer_close *hdr = cplhdr(skb); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + int disconnect = 1; + int release = 0; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(hdr); + int ret; + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + dst_confirm(ep->dst); + + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case MPA_REQ_WAIT: + __state_set(&ep->com, CLOSING); + break; + case MPA_REQ_SENT: + __state_set(&ep->com, CLOSING); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see c4iw_accept_cr()). + */ + __state_set(&ep->com, CLOSING); + PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + break; + case MPA_REP_SENT: + __state_set(&ep->com, CLOSING); + PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + break; + case FPDU_MODE: + start_ep_timer(ep); + __state_set(&ep->com, CLOSING); + attrs.next_state = C4IW_QP_STATE_CLOSING; + ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + if (ret != -ECONNRESET) { + peer_close_upcall(ep); + disconnect = 1; + } + break; + case ABORTING: + disconnect = 0; + break; + case CLOSING: + __state_set(&ep->com, MORIBUND); + disconnect = 0; + break; + case MORIBUND: + stop_ep_timer(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + disconnect = 0; + break; + case DEAD: + disconnect = 0; + break; + default: + BUG_ON(1); + } + mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + if (release) + release_ep_resources(ep); + return 0; +} + +/* + * Returns whether an ABORT_REQ_RSS message is a negative advice. + */ +static int is_neg_adv_abort(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE; +} + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + struct rtable *rt; + int err = 0; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + + /* find a route */ + rt = find_route(ep->com.dev, + ep->com.cm_id->local_addr.sin_addr.s_addr, + ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->com.cm_id->local_addr.sin_port, + ep->com.cm_id->remote_addr.sin_port, 0); + if (!rt) { + printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->dst, ep->com.dev, false); + if (err) { + printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + +static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req_rss *req = cplhdr(skb); + struct c4iw_ep *ep; + struct cpl_abort_rpl *rpl; + struct sk_buff *rpl_skb; + struct c4iw_qp_attributes attrs; + int ret; + int release = 0; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(req); + + ep = lookup_tid(t, tid); + if (is_neg_adv_abort(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, + ep->hwtid); + return 0; + } + PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, + ep->com.state); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + * However, this is not needed if com state is just + * MPA_REQ_SENT + */ + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case CONNECTING: + break; + case MPA_REQ_WAIT: + stop_ep_timer(ep); + break; + case MPA_REQ_SENT: + stop_ep_timer(ep); + if (mpa_rev == 2 && ep->tried_with_mpa_v1) + connect_reply_upcall(ep, -ECONNRESET); + else { + /* + * we just don't send notification upwards because we + * want to retry with mpa_v1 without upper layers even + * knowing it. + * + * do some housekeeping so as to re-initiate the + * connection + */ + PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__, + mpa_rev); + ep->retry_with_mpa_v1 = 1; + } + break; + case MPA_REP_SENT: + break; + case MPA_REQ_RCVD: + break; + case MORIBUND: + case CLOSING: + stop_ep_timer(ep); + /*FALLTHROUGH*/ + case FPDU_MODE: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + printk(KERN_ERR MOD + "%s - qp <- error failed!\n", + __func__); + } + peer_abort_upcall(ep); + break; + case ABORTING: + break; + case DEAD: + PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + mutex_unlock(&ep->com.mutex); + return 0; + default: + BUG_ON(1); + break; + } + dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + /* we don't release if we want to retry with mpa_v1 */ + if (!ep->retry_with_mpa_v1) + release = 1; + } + mutex_unlock(&ep->com.mutex); + + rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); + if (!rpl_skb) { + printk(KERN_ERR MOD "%s - cannot allocate skb!\n", + __func__); + release = 1; + goto out; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); + rpl->cmd = CPL_ABORT_NO_RST; + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); +out: + if (release) + release_ep_resources(ep); + + /* retry with mpa-v1 */ + if (ep && ep->retry_with_mpa_v1) { + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + } + + return 0; +} + +static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + struct cpl_close_con_rpl *rpl = cplhdr(skb); + int release = 0; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(rpl); + + ep = lookup_tid(t, tid); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + BUG_ON(!ep); + + /* The cm_id may be null if we failed to connect */ + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case CLOSING: + __state_set(&ep->com, MORIBUND); + break; + case MORIBUND: + stop_ep_timer(ep); + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + case ABORTING: + case DEAD: + break; + default: + BUG_ON(1); + break; + } + mutex_unlock(&ep->com.mutex); + if (release) + release_ep_resources(ep); + return 0; +} + +static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_rdma_terminate *rpl = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + + ep = lookup_tid(t, tid); + BUG_ON(!ep); + + if (ep && ep->com.qp) { + printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } else + printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); + + return 0; +} + +/* + * Upcall from the adapter indicating data has been transmitted. + * For us its just the single MPA request or reply. We can now free + * the skb holding the mpa message. + */ +static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_fw4_ack *hdr = cplhdr(skb); + u8 credits = hdr->credits; + unsigned int tid = GET_TID(hdr); + struct tid_info *t = dev->rdev.lldi.tids; + + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); + if (credits == 0) { + PDBG("%s 0 credit ack ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, state_read(&ep->com)); + return 0; + } + + dst_confirm(ep->dst); + if (ep->mpa_skb) { + PDBG("%s last streaming msg ack ep %p tid %u state %u " + "initiator %u freeing skb\n", __func__, ep, ep->hwtid, + state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); + kfree_skb(ep->mpa_skb); + ep->mpa_skb = NULL; + } + return 0; +} + +int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err; + struct c4iw_ep *ep = to_ep(cm_id); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + if (state_read(&ep->com) == DEAD) { + c4iw_put_ep(&ep->com); + return -ECONNRESET; + } + BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + if (mpa_rev == 0) + abort_connection(ep, NULL, GFP_KERNEL); + else { + err = send_mpa_reject(ep, pdata, pdata_len); + err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + } + c4iw_put_ep(&ep->com); + return 0; +} + +int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err; + struct c4iw_qp_attributes attrs; + enum c4iw_qp_attr_mask mask; + struct c4iw_ep *ep = to_ep(cm_id); + struct c4iw_dev *h = to_c4iw_dev(cm_id->device); + struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + if (state_read(&ep->com) == DEAD) { + err = -ECONNRESET; + goto err; + } + + BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(!qp); + + if ((conn_param->ord > c4iw_max_read_depth) || + (conn_param->ird > c4iw_max_read_depth)) { + abort_connection(ep, NULL, GFP_KERNEL); + err = -EINVAL; + goto err; + } + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + if (conn_param->ord > ep->ird) { + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + send_mpa_reject(ep, conn_param->private_data, + conn_param->private_data_len); + abort_connection(ep, NULL, GFP_KERNEL); + err = -ENOMEM; + goto err; + } + if (conn_param->ird > ep->ord) { + if (!ep->ord) + conn_param->ird = 1; + else { + abort_connection(ep, NULL, GFP_KERNEL); + err = -ENOMEM; + goto err; + } + } + + } + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (ep->mpa_attr.version != 2) + if (peer2peer && ep->ird == 0) + ep->ird = 1; + + PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = qp; + + /* bind QP to EP and move to RTS */ + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = C4IW_QP_STATE_RTS; + + /* bind QP and TID with INIT_WR */ + mask = C4IW_QP_ATTR_NEXT_STATE | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | + C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_MAX_IRD | + C4IW_QP_ATTR_MAX_ORD; + + err = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err1; + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err1; + + state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + c4iw_put_ep(&ep->com); + return 0; +err1: + ep->com.cm_id = NULL; + ep->com.qp = NULL; + cm_id->rem_ref(cm_id); +err: + c4iw_put_ep(&ep->com); + return err; +} + +int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); + struct c4iw_ep *ep; + struct rtable *rt; + int err = 0; + + if ((conn_param->ord > c4iw_max_read_depth) || + (conn_param->ird > c4iw_max_read_depth)) { + err = -EINVAL; + goto out; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); + err = -ENOMEM; + goto out; + } + init_timer(&ep->timer); + ep->plen = conn_param->private_data_len; + if (ep->plen) + memcpy(ep->mpa_pkt + sizeof(struct mpa_message), + conn_param->private_data, ep->plen); + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (peer2peer && ep->ord == 0) + ep->ord = 1; + + cm_id->add_ref(cm_id); + ep->com.dev = dev; + ep->com.cm_id = cm_id; + ep->com.qp = get_qhp(dev, conn_param->qpn); + BUG_ON(!ep->com.qp); + PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, + ep->com.qp, cm_id); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + + PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port), + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port)); + + /* find a route */ + rt = find_route(dev, + cm_id->local_addr.sin_addr.s_addr, + cm_id->remote_addr.sin_addr.s_addr, + cm_id->local_addr.sin_port, + cm_id->remote_addr.sin_port, 0); + if (!rt) { + printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr, + ep->dst, ep->com.dev, true); + if (err) { + printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + ep->com.local_addr = cm_id->local_addr; + ep->com.remote_addr = cm_id->remote_addr; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + cm_id->rem_ref(cm_id); + c4iw_put_ep(&ep->com); +out: + return err; +} + +int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int err = 0; + struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); + struct c4iw_listen_ep *ep; + + + might_sleep(); + + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); + err = -ENOMEM; + goto fail1; + } + PDBG("%s ep %p\n", __func__, ep); + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.dev = dev; + ep->backlog = backlog; + ep->com.local_addr = cm_id->local_addr; + + /* + * Allocate a server TID. + */ + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (ep->stid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + + state_set(&ep->com, LISTEN); + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (err) + goto fail3; + + /* wait for pass_open_rpl */ + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, + __func__); + if (!err) { + cm_id->provider_data = ep; + goto out; + } +fail3: + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); +fail2: + cm_id->rem_ref(cm_id); + c4iw_put_ep(&ep->com); +fail1: +out: + return err; +} + +int c4iw_destroy_listen(struct iw_cm_id *cm_id) +{ + int err; + struct c4iw_listen_ep *ep = to_listen_ep(cm_id); + + PDBG("%s ep %p\n", __func__, ep); + + might_sleep(); + state_set(&ep->com, DEAD); + c4iw_init_wr_wait(&ep->com.wr_wait); + err = listen_stop(ep); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, + __func__); + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); +done: + cm_id->rem_ref(cm_id); + c4iw_put_ep(&ep->com); + return err; +} + +int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) +{ + int ret = 0; + int close = 0; + int fatal = 0; + struct c4iw_rdev *rdev; + + mutex_lock(&ep->com.mutex); + + PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, + states[ep->com.state], abrupt); + + rdev = &ep->com.dev->rdev; + if (c4iw_fatal_error(rdev)) { + fatal = 1; + close_complete_upcall(ep); + ep->com.state = DEAD; + } + switch (ep->com.state) { + case MPA_REQ_WAIT: + case MPA_REQ_SENT: + case MPA_REQ_RCVD: + case MPA_REP_SENT: + case FPDU_MODE: + close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + start_ep_timer(ep); + } + set_bit(CLOSE_SENT, &ep->com.flags); + break; + case CLOSING: + if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { + close = 1; + if (abrupt) { + stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; + } + break; + case MORIBUND: + case ABORTING: + case DEAD: + PDBG("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); + break; + default: + BUG(); + break; + } + + if (close) { + if (abrupt) { + close_complete_upcall(ep); + ret = send_abort(ep, NULL, gfp); + } else + ret = send_halfclose(ep, gfp); + if (ret) + fatal = 1; + } + mutex_unlock(&ep->com.mutex); + if (fatal) + release_ep_resources(ep); + return ret; +} + +static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + return 0; +} + +/* + * These are the real handlers that are called from a + * work queue. + */ +static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = act_establish, + [CPL_ACT_OPEN_RPL] = act_open_rpl, + [CPL_RX_DATA] = rx_data, + [CPL_ABORT_RPL_RSS] = abort_rpl, + [CPL_ABORT_RPL] = abort_rpl, + [CPL_PASS_OPEN_RPL] = pass_open_rpl, + [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, + [CPL_PASS_ACCEPT_REQ] = pass_accept_req, + [CPL_PASS_ESTABLISH] = pass_establish, + [CPL_PEER_CLOSE] = peer_close, + [CPL_ABORT_REQ_RSS] = peer_abort, + [CPL_CLOSE_CON_RPL] = close_con_rpl, + [CPL_RDMA_TERMINATE] = terminate, + [CPL_FW4_ACK] = fw4_ack, + [CPL_FW6_MSG] = async_event +}; + +static void process_timeout(struct c4iw_ep *ep) +{ + struct c4iw_qp_attributes attrs; + int abort = 1; + + mutex_lock(&ep->com.mutex); + PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, + ep->com.state); + switch (ep->com.state) { + case MPA_REQ_SENT: + __state_set(&ep->com, ABORTING); + connect_reply_upcall(ep, -ETIMEDOUT); + break; + case MPA_REQ_WAIT: + __state_set(&ep->com, ABORTING); + break; + case CLOSING: + case MORIBUND: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + __state_set(&ep->com, ABORTING); + break; + default: + printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, ep->com.state); + WARN_ON(1); + abort = 0; + } + mutex_unlock(&ep->com.mutex); + if (abort) + abort_connection(ep, NULL, GFP_KERNEL); + c4iw_put_ep(&ep->com); +} + +static void process_timedout_eps(void) +{ + struct c4iw_ep *ep; + + spin_lock_irq(&timeout_lock); + while (!list_empty(&timeout_list)) { + struct list_head *tmp; + + tmp = timeout_list.next; + list_del(tmp); + spin_unlock_irq(&timeout_lock); + ep = list_entry(tmp, struct c4iw_ep, entry); + process_timeout(ep); + spin_lock_irq(&timeout_lock); + } + spin_unlock_irq(&timeout_lock); +} + +static void process_work(struct work_struct *work) +{ + struct sk_buff *skb = NULL; + struct c4iw_dev *dev; + struct cpl_act_establish *rpl; + unsigned int opcode; + int ret; + + while ((skb = skb_dequeue(&rxq))) { + rpl = cplhdr(skb); + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); + opcode = rpl->ot.opcode; + + BUG_ON(!work_handlers[opcode]); + ret = work_handlers[opcode](dev, skb); + if (!ret) + kfree_skb(skb); + } + process_timedout_eps(); +} + +static DECLARE_WORK(skb_work, process_work); + +static void ep_timeout(unsigned long arg) +{ + struct c4iw_ep *ep = (struct c4iw_ep *)arg; + + spin_lock(&timeout_lock); + list_add_tail(&ep->entry, &timeout_list); + spin_unlock(&timeout_lock); + queue_work(workq, &skb_work); +} + +/* + * All the CM events are handled on a work queue to have a safe context. + */ +static int sched(struct c4iw_dev *dev, struct sk_buff *skb) +{ + + /* + * Save dev in the skb->cb area. + */ + *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev; + + /* + * Queue the skb and schedule the worker thread. + */ + skb_queue_tail(&rxq, skb); + queue_work(workq, &skb_work); + return 0; +} + +static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb); + + if (rpl->status != CPL_ERR_NONE) { + printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u " + "for tid %u\n", rpl->status, GET_TID(rpl)); + } + kfree_skb(skb); + return 0; +} + +static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + struct c4iw_wr_wait *wr_waitp; + int ret; + + PDBG("%s type %u\n", __func__, rpl->type); + + switch (rpl->type) { + case 1: + ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); + wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; + PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); + if (wr_waitp) + c4iw_wake_up(wr_waitp, ret ? -ret : 0); + kfree_skb(skb); + break; + case 2: + sched(dev, skb); + break; + default: + printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__, + rpl->type); + kfree_skb(skb); + break; + } + return 0; +} + +static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req_rss *req = cplhdr(skb); + struct c4iw_ep *ep; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(req); + + ep = lookup_tid(t, tid); + if (!ep) { + printk(KERN_WARNING MOD + "Abort on non-existent endpoint, tid %d\n", tid); + kfree_skb(skb); + return 0; + } + if (is_neg_adv_abort(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, + ep->hwtid); + kfree_skb(skb); + return 0; + } + PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, + ep->com.state); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + */ + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + sched(dev, skb); + return 0; +} + +/* + * Most upcalls from the T4 Core go to sched() to + * schedule the processing on a work queue. + */ +c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = sched, + [CPL_ACT_OPEN_RPL] = sched, + [CPL_RX_DATA] = sched, + [CPL_ABORT_RPL_RSS] = sched, + [CPL_ABORT_RPL] = sched, + [CPL_PASS_OPEN_RPL] = sched, + [CPL_CLOSE_LISTSRV_RPL] = sched, + [CPL_PASS_ACCEPT_REQ] = sched, + [CPL_PASS_ESTABLISH] = sched, + [CPL_PEER_CLOSE] = sched, + [CPL_CLOSE_CON_RPL] = sched, + [CPL_ABORT_REQ_RSS] = peer_abort_intr, + [CPL_RDMA_TERMINATE] = sched, + [CPL_FW4_ACK] = sched, + [CPL_SET_TCB_RPL] = set_tcb_rpl, + [CPL_FW6_MSG] = fw6_msg +}; + +int __init c4iw_cm_init(void) +{ + spin_lock_init(&timeout_lock); + skb_queue_head_init(&rxq); + + workq = create_singlethread_workqueue("iw_cxgb4"); + if (!workq) + return -ENOMEM; + + return 0; +} + +void __exit c4iw_cm_term(void) +{ + WARN_ON(!list_empty(&timeout_list)); + flush_workqueue(workq); + destroy_workqueue(workq); +} diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c new file mode 100644 index 00000000..0f1607c8 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -0,0 +1,898 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iw_cxgb4.h" + +static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, + struct c4iw_dev_ucontext *uctx) +{ + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + struct c4iw_wr_wait wr_wait; + struct sk_buff *skb; + int ret; + + wr_len = sizeof *res_wr + sizeof *res; + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP(FW_RI_RES_WR) | + V_FW_RI_RES_WR_NRES(1) | + FW_WR_COMPL(1)); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (unsigned long) &wr_wait; + res = res_wr->res; + res->u.cq.restype = FW_RI_RES_TYPE_CQ; + res->u.cq.op = FW_RI_RES_OP_RESET; + res->u.cq.iqid = cpu_to_be32(cq->cqid); + + c4iw_init_wr_wait(&wr_wait); + ret = c4iw_ofld_send(rdev, skb); + if (!ret) { + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + } + + kfree(cq->sw_queue); + dma_free_coherent(&(rdev->lldi.pdev->dev), + cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); + c4iw_put_cqid(rdev, cq->cqid, uctx); + return ret; +} + +static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, + struct c4iw_dev_ucontext *uctx) +{ + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + int user = (uctx != &rdev->uctx); + struct c4iw_wr_wait wr_wait; + int ret; + struct sk_buff *skb; + + cq->cqid = c4iw_get_cqid(rdev, uctx); + if (!cq->cqid) { + ret = -ENOMEM; + goto err1; + } + + if (!user) { + cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); + if (!cq->sw_queue) { + ret = -ENOMEM; + goto err2; + } + } + cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, + &cq->dma_addr, GFP_KERNEL); + if (!cq->queue) { + ret = -ENOMEM; + goto err3; + } + dma_unmap_addr_set(cq, mapping, cq->dma_addr); + memset(cq->queue, 0, cq->memsize); + + /* build fw_ri_res_wr */ + wr_len = sizeof *res_wr + sizeof *res; + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto err4; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP(FW_RI_RES_WR) | + V_FW_RI_RES_WR_NRES(1) | + FW_WR_COMPL(1)); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (unsigned long) &wr_wait; + res = res_wr->res; + res->u.cq.restype = FW_RI_RES_TYPE_CQ; + res->u.cq.op = FW_RI_RES_OP_WRITE; + res->u.cq.iqid = cpu_to_be32(cq->cqid); + res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( + V_FW_RI_RES_WR_IQANUS(0) | + V_FW_RI_RES_WR_IQANUD(1) | + F_FW_RI_RES_WR_IQANDST | + V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids)); + res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( + F_FW_RI_RES_WR_IQDROPRSS | + V_FW_RI_RES_WR_IQPCIECH(2) | + V_FW_RI_RES_WR_IQINTCNTTHRESH(0) | + F_FW_RI_RES_WR_IQO | + V_FW_RI_RES_WR_IQESIZE(1)); + res->u.cq.iqsize = cpu_to_be16(cq->size); + res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); + + c4iw_init_wr_wait(&wr_wait); + + ret = c4iw_ofld_send(rdev, skb); + if (ret) + goto err4; + PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait); + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + if (ret) + goto err4; + + cq->gen = 1; + cq->gts = rdev->lldi.gts_reg; + cq->rdev = rdev; + if (user) { + cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) + + (cq->cqid << rdev->cqshift); + cq->ugts &= PAGE_MASK; + } + return 0; +err4: + dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); +err3: + kfree(cq->sw_queue); +err2: + c4iw_put_cqid(rdev, cq->cqid, uctx); +err1: + return ret; +} + +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_cqe cqe; + + PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__, + wq, cq, cq->sw_cidx, cq->sw_pidx); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | + V_CQE_OPCODE(FW_RI_SEND) | + V_CQE_TYPE(0) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->sq.qid)); + cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); +} + +int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) +{ + int flushed = 0; + int in_use = wq->rq.in_use - count; + + BUG_ON(in_use < 0); + PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__, + wq, cq, wq->rq.in_use, count); + while (in_use--) { + insert_recv_cqe(wq, cq); + flushed++; + } + return flushed; +} + +static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, + struct t4_swsqe *swcqe) +{ + struct t4_cqe cqe; + + PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__, + wq, cq, cq->sw_cidx, cq->sw_pidx); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | + V_CQE_OPCODE(swcqe->opcode) | + V_CQE_TYPE(1) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->sq.qid)); + CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; + cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); +} + +int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count) +{ + int flushed = 0; + struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count]; + int in_use = wq->sq.in_use - count; + + BUG_ON(in_use < 0); + while (in_use--) { + swsqe->signaled = 0; + insert_sq_cqe(wq, cq, swsqe); + swsqe++; + if (swsqe == (wq->sq.sw_sq + wq->sq.size)) + swsqe = wq->sq.sw_sq; + flushed++; + } + return flushed; +} + +/* + * Move all CQEs from the HWCQ into the SWCQ. + */ +void c4iw_flush_hw_cq(struct t4_cq *cq) +{ + struct t4_cqe *cqe = NULL, *swcqe; + int ret; + + PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); + ret = t4_next_hw_cqe(cq, &cqe); + while (!ret) { + PDBG("%s flushing hwcq cidx 0x%x swcq pidx 0x%x\n", + __func__, cq->cidx, cq->sw_pidx); + swcqe = &cq->sw_queue[cq->sw_pidx]; + *swcqe = *cqe; + swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); + t4_swcq_produce(cq); + t4_hwcq_consume(cq); + ret = t4_next_hw_cqe(cq, &cqe); + } +} + +static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) +{ + if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) + return 0; + + if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) + return 0; + return 1; +} + +void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count) +{ + struct t4_cqe *cqe; + u32 ptr; + + *count = 0; + ptr = cq->sw_cidx; + while (ptr != cq->sw_pidx) { + cqe = &cq->sw_queue[ptr]; + if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && + wq->sq.oldest_read)) && + (CQE_QPID(cqe) == wq->sq.qid)) + (*count)++; + if (++ptr == cq->size) + ptr = 0; + } + PDBG("%s cq %p count %d\n", __func__, cq, *count); +} + +void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) +{ + struct t4_cqe *cqe; + u32 ptr; + + *count = 0; + PDBG("%s count zero %d\n", __func__, *count); + ptr = cq->sw_cidx; + while (ptr != cq->sw_pidx) { + cqe = &cq->sw_queue[ptr]; + if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && + (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) + (*count)++; + if (++ptr == cq->size) + ptr = 0; + } + PDBG("%s cq %p count %d\n", __func__, cq, *count); +} + +static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_swsqe *swsqe; + u16 ptr = wq->sq.cidx; + int count = wq->sq.in_use; + int unsignaled = 0; + + swsqe = &wq->sq.sw_sq[ptr]; + while (count--) + if (!swsqe->signaled) { + if (++ptr == wq->sq.size) + ptr = 0; + swsqe = &wq->sq.sw_sq[ptr]; + unsignaled++; + } else if (swsqe->complete) { + + /* + * Insert this completed cqe into the swcq. + */ + PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", + __func__, ptr, cq->sw_pidx); + swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); + cq->sw_queue[cq->sw_pidx] = swsqe->cqe; + t4_swcq_produce(cq); + swsqe->signaled = 0; + wq->sq.in_use -= unsignaled; + break; + } else + break; +} + +static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, + struct t4_cqe *read_cqe) +{ + read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; + read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len); + read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | + V_CQE_SWCQE(SW_CQE(hw_cqe)) | + V_CQE_OPCODE(FW_RI_READ_REQ) | + V_CQE_TYPE(1)); + read_cqe->bits_type_ts = hw_cqe->bits_type_ts; +} + +/* + * Return a ptr to the next read wr in the SWSQ or NULL. + */ +static void advance_oldest_read(struct t4_wq *wq) +{ + + u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; + + if (rptr == wq->sq.size) + rptr = 0; + while (rptr != wq->sq.pidx) { + wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; + + if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) + return; + if (++rptr == wq->sq.size) + rptr = 0; + } + wq->sq.oldest_read = NULL; +} + +/* + * poll_cq + * + * Caller must: + * check the validity of the first CQE, + * supply the wq assicated with the qpid. + * + * credit: cq credit to return to sge. + * cqe_flushed: 1 iff the CQE is flushed. + * cqe: copy of the polled CQE. + * + * return value: + * 0 CQE returned ok. + * -EAGAIN CQE skipped, try again. + * -EOVERFLOW CQ overflow detected. + */ +static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, + u8 *cqe_flushed, u64 *cookie, u32 *credit) +{ + int ret = 0; + struct t4_cqe *hw_cqe, read_cqe; + + *cqe_flushed = 0; + *credit = 0; + ret = t4_next_cqe(cq, &hw_cqe); + if (ret) + return ret; + + PDBG("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x" + " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", + __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), + CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), + CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), + CQE_WRID_LOW(hw_cqe)); + + /* + * skip cqe's not affiliated with a QP. + */ + if (wq == NULL) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Gotta tweak READ completions: + * 1) the cqe doesn't contain the sq_wptr from the wr. + * 2) opcode not reflected from the wr. + * 3) read_len not reflected from the wr. + * 4) cq_type is RQ_TYPE not SQ_TYPE. + */ + if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { + + /* + * If this is an unsolicited read response, then the read + * was generated by the kernel driver as part of peer-2-peer + * connection setup. So ignore the completion. + */ + if (!wq->sq.oldest_read) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Don't write to the HWCQ, so create a new read req CQE + * in local memory. + */ + create_read_req_cqe(wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(wq); + } + + if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { + *cqe_flushed = t4_wq_in_error(wq); + t4_set_wq_in_error(wq); + goto proc_cqe; + } + + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * RECV completion. + */ + if (RQ_TYPE(hw_cqe)) { + + /* + * HW only validates 4 bits of MSN. So we must validate that + * the MSN in the SEND is the next expected MSN. If its not, + * then we complete this with T4_ERR_MSN and mark the wq in + * error. + */ + + if (t4_rq_empty(wq)) { + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { + t4_set_wq_in_error(wq); + hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN)); + goto proc_cqe; + } + goto proc_cqe; + } + + /* + * If we get here its a send completion. + * + * Handle out of order completion. These get stuffed + * in the SW SQ. Then the SW SQ is walked to move any + * now in-order completions into the SW CQ. This handles + * 2 cases: + * 1) reaping unsignaled WRs when the first subsequent + * signaled WR is completed. + * 2) out of order read completions. + */ + if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { + struct t4_swsqe *swsqe; + + PDBG("%s out of order completion going in sw_sq at idx %u\n", + __func__, CQE_WRID_SQ_IDX(hw_cqe)); + swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + ret = -EAGAIN; + goto flush_wq; + } + +proc_cqe: + *cqe = *hw_cqe; + + /* + * Reap the associated WR(s) that are freed up with this + * completion. + */ + if (SQ_TYPE(hw_cqe)) { + wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe); + PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); + *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; + t4_sq_consume(wq); + } else { + PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + BUG_ON(t4_rq_empty(wq)); + t4_rq_consume(wq); + } + +flush_wq: + /* + * Flush any completed cqes that are now in-order. + */ + flush_completed_wrs(wq, cq); + +skip_cqe: + if (SW_CQE(hw_cqe)) { + PDBG("%s cq %p cqid 0x%x skip sw cqe cidx %u\n", + __func__, cq, cq->cqid, cq->sw_cidx); + t4_swcq_consume(cq); + } else { + PDBG("%s cq %p cqid 0x%x skip hw cqe cidx %u\n", + __func__, cq, cq->cqid, cq->cidx); + t4_hwcq_consume(cq); + } + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_qp *qhp = NULL; + struct t4_cqe cqe = {0, 0}, *rd_cqe; + struct t4_wq *wq; + u32 credit = 0; + u8 cqe_flushed; + u64 cookie = 0; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (!qhp) + wq = NULL; + else { + spin_lock(&qhp->lock); + wq = &(qhp->wq); + } + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + if (ret) + goto out; + + wc->wr_id = cookie; + wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(&cqe); + wc->wc_flags = 0; + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x " + "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe), + CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe), + CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), (unsigned long long)cookie); + + if (CQE_TYPE(&cqe) == 0) { + if (!CQE_STATUS(&cqe)) + wc->byte_len = CQE_LEN(&cqe); + else + wc->byte_len = 0; + wc->opcode = IB_WC_RECV; + if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || + CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { + wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } + } else { + switch (CQE_OPCODE(&cqe)) { + case FW_RI_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case FW_RI_READ_REQ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = CQE_LEN(&cqe); + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_SEND; + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + break; + case FW_RI_SEND: + case FW_RI_SEND_WITH_SE: + wc->opcode = IB_WC_SEND; + break; + case FW_RI_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + + case FW_RI_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; + case FW_RI_FAST_REGISTER: + wc->opcode = IB_WC_FAST_REG_MR; + break; + default: + printk(KERN_ERR MOD "Unexpected opcode %d " + "in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; + } + } + + if (cqe_flushed) + wc->status = IB_WC_WR_FLUSH_ERR; + else { + + switch (CQE_STATUS(&cqe)) { + case T4_ERR_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case T4_ERR_STAG: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case T4_ERR_PDID: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case T4_ERR_QPID: + case T4_ERR_ACCESS: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case T4_ERR_WRAP: + wc->status = IB_WC_GENERAL_ERR; + break; + case T4_ERR_BOUND: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + wc->status = IB_WC_MW_BIND_ERR; + break; + case T4_ERR_CRC: + case T4_ERR_MARKER: + case T4_ERR_PDU_LEN_ERR: + case T4_ERR_OUT_OF_RQE: + case T4_ERR_DDP_VERSION: + case T4_ERR_RDMA_VERSION: + case T4_ERR_DDP_QUEUE_NUM: + case T4_ERR_MSN: + case T4_ERR_TBIT: + case T4_ERR_MO: + case T4_ERR_MSN_RANGE: + case T4_ERR_IRD_OVERFLOW: + case T4_ERR_OPCODE: + case T4_ERR_INTERNAL_ERR: + wc->status = IB_WC_FATAL_ERR; + break; + case T4_ERR_SWFLUSH: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + default: + printk(KERN_ERR MOD + "Unexpected cqe_status 0x%x for QPID=0x%0x\n", + CQE_STATUS(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + } + } +out: + if (wq) + spin_unlock(&qhp->lock); + return ret; +} + +int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct c4iw_cq *chp; + unsigned long flags; + int npolled; + int err = 0; + + chp = to_c4iw_cq(ibcq); + + spin_lock_irqsave(&chp->lock, flags); + for (npolled = 0; npolled < num_entries; ++npolled) { + do { + err = c4iw_poll_cq_one(chp, wc + npolled); + } while (err == -EAGAIN); + if (err) + break; + } + spin_unlock_irqrestore(&chp->lock, flags); + return !err || err == -ENODATA ? npolled : err; +} + +int c4iw_destroy_cq(struct ib_cq *ib_cq) +{ + struct c4iw_cq *chp; + struct c4iw_ucontext *ucontext; + + PDBG("%s ib_cq %p\n", __func__, ib_cq); + chp = to_c4iw_cq(ib_cq); + + remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + atomic_dec(&chp->refcnt); + wait_event(chp->wait, !atomic_read(&chp->refcnt)); + + ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) + : NULL; + destroy_cq(&chp->rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); + kfree(chp); + return 0; +} + +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *ib_context, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_cq *chp; + struct c4iw_create_cq_resp uresp; + struct c4iw_ucontext *ucontext = NULL; + int ret; + size_t memsize, hwentries; + struct c4iw_mm_entry *mm, *mm2; + + PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + + rhp = to_c4iw_dev(ibdev); + + chp = kzalloc(sizeof(*chp), GFP_KERNEL); + if (!chp) + return ERR_PTR(-ENOMEM); + + if (ib_context) + ucontext = to_c4iw_ucontext(ib_context); + + /* account for the status page. */ + entries++; + + /* IQ needs one extra entry to differentiate full vs empty. */ + entries++; + + /* + * entries must be multiple of 16 for HW. + */ + entries = roundup(entries, 16); + + /* + * Make actual HW queue 2x to avoid cdix_inc overflows. + */ + hwentries = entries * 2; + + /* + * Make HW queue at least 64 entries so GTS updates aren't too + * frequent. + */ + if (hwentries < 64) + hwentries = 64; + + memsize = hwentries * sizeof *chp->cq.queue; + + /* + * memsize must be a multiple of the page size if its a user cq. + */ + if (ucontext) { + memsize = roundup(memsize, PAGE_SIZE); + hwentries = memsize / sizeof *chp->cq.queue; + while (hwentries > T4_MAX_IQ_SIZE) { + memsize -= PAGE_SIZE; + hwentries = memsize / sizeof *chp->cq.queue; + } + } + chp->cq.size = hwentries; + chp->cq.memsize = memsize; + + ret = create_cq(&rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + if (ret) + goto err1; + + chp->rhp = rhp; + chp->cq.size--; /* status page */ + chp->ibcq.cqe = entries - 2; + spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); + atomic_set(&chp->refcnt, 1); + init_waitqueue_head(&chp->wait); + ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); + if (ret) + goto err2; + + if (ucontext) { + mm = kmalloc(sizeof *mm, GFP_KERNEL); + if (!mm) + goto err3; + mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); + if (!mm2) + goto err4; + + uresp.qid_mask = rhp->rdev.cqmask; + uresp.cqid = chp->cq.cqid; + uresp.size = chp->cq.size; + uresp.memsize = chp->cq.memsize; + spin_lock(&ucontext->mmap_lock); + uresp.key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + if (ret) + goto err5; + + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + mm->len = chp->cq.memsize; + insert_mmap(ucontext, mm); + + mm2->key = uresp.gts_key; + mm2->addr = chp->cq.ugts; + mm2->len = PAGE_SIZE; + insert_mmap(ucontext, mm2); + } + PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", + __func__, chp->cq.cqid, chp, chp->cq.size, + chp->cq.memsize, + (unsigned long long) chp->cq.dma_addr); + return &chp->ibcq; +err5: + kfree(mm2); +err4: + kfree(mm); +err3: + remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); +err2: + destroy_cq(&chp->rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); +err1: + kfree(chp); + return ERR_PTR(ret); +} + +int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + return -ENOSYS; +} + +int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct c4iw_cq *chp; + int ret; + unsigned long flag; + + chp = to_c4iw_cq(ibcq); + spin_lock_irqsave(&chp->lock, flag); + ret = t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + spin_unlock_irqrestore(&chp->lock, flag); + if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) + ret = 0; + return ret; +} diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c new file mode 100644 index 00000000..6d0df6ec --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -0,0 +1,630 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include + +#include + +#include "iw_cxgb4.h" + +#define DRV_VERSION "0.1" + +MODULE_AUTHOR("Steve Wise"); +MODULE_DESCRIPTION("Chelsio T4 RDMA Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static LIST_HEAD(uld_ctx_list); +static DEFINE_MUTEX(dev_mutex); + +static struct dentry *c4iw_debugfs_root; + +struct c4iw_debugfs_data { + struct c4iw_dev *devp; + char *buf; + int bufsize; + int pos; +}; + +static int count_idrs(int id, void *p, void *data) +{ + int *countp = data; + + *countp = *countp + 1; + return 0; +} + +static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct c4iw_debugfs_data *d = file->private_data; + + return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos); +} + +static int dump_qp(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + struct c4iw_debugfs_data *qpd = data; + int space; + int cc; + + if (id != qp->wq.sq.qid) + return 0; + + space = qpd->bufsize - qpd->pos - 1; + if (space == 0) + return 1; + + if (qp->ep) + cc = snprintf(qpd->buf + qpd->pos, space, + "qp sq id %u rq id %u state %u onchip %u " + "ep tid %u state %u %pI4:%u->%pI4:%u\n", + qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP, + qp->ep->hwtid, (int)qp->ep->com.state, + &qp->ep->com.local_addr.sin_addr.s_addr, + ntohs(qp->ep->com.local_addr.sin_port), + &qp->ep->com.remote_addr.sin_addr.s_addr, + ntohs(qp->ep->com.remote_addr.sin_port)); + else + cc = snprintf(qpd->buf + qpd->pos, space, + "qp sq id %u rq id %u state %u onchip %u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP); + if (cc < space) + qpd->pos += cc; + return 0; +} + +static int qp_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *qpd = file->private_data; + if (!qpd) { + printk(KERN_INFO "%s null qpd?\n", __func__); + return 0; + } + kfree(qpd->buf); + kfree(qpd); + return 0; +} + +static int qp_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *qpd; + int ret = 0; + int count = 1; + + qpd = kmalloc(sizeof *qpd, GFP_KERNEL); + if (!qpd) { + ret = -ENOMEM; + goto out; + } + qpd->devp = inode->i_private; + qpd->pos = 0; + + spin_lock_irq(&qpd->devp->lock); + idr_for_each(&qpd->devp->qpidr, count_idrs, &count); + spin_unlock_irq(&qpd->devp->lock); + + qpd->bufsize = count * 128; + qpd->buf = kmalloc(qpd->bufsize, GFP_KERNEL); + if (!qpd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&qpd->devp->lock); + idr_for_each(&qpd->devp->qpidr, dump_qp, qpd); + spin_unlock_irq(&qpd->devp->lock); + + qpd->buf[qpd->pos++] = 0; + file->private_data = qpd; + goto out; +err1: + kfree(qpd); +out: + return ret; +} + +static const struct file_operations qp_debugfs_fops = { + .owner = THIS_MODULE, + .open = qp_open, + .release = qp_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static int dump_stag(int id, void *p, void *data) +{ + struct c4iw_debugfs_data *stagd = data; + int space; + int cc; + + space = stagd->bufsize - stagd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8); + if (cc < space) + stagd->pos += cc; + return 0; +} + +static int stag_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd = file->private_data; + if (!stagd) { + printk(KERN_INFO "%s null stagd?\n", __func__); + return 0; + } + kfree(stagd->buf); + kfree(stagd); + return 0; +} + +static int stag_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd; + int ret = 0; + int count = 1; + + stagd = kmalloc(sizeof *stagd, GFP_KERNEL); + if (!stagd) { + ret = -ENOMEM; + goto out; + } + stagd->devp = inode->i_private; + stagd->pos = 0; + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, count_idrs, &count); + spin_unlock_irq(&stagd->devp->lock); + + stagd->bufsize = count * sizeof("0x12345678\n"); + stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL); + if (!stagd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, dump_stag, stagd); + spin_unlock_irq(&stagd->devp->lock); + + stagd->buf[stagd->pos++] = 0; + file->private_data = stagd; + goto out; +err1: + kfree(stagd); +out: + return ret; +} + +static const struct file_operations stag_debugfs_fops = { + .owner = THIS_MODULE, + .open = stag_open, + .release = stag_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static int setup_debugfs(struct c4iw_dev *devp) +{ + struct dentry *de; + + if (!devp->debugfs_root) + return -1; + + de = debugfs_create_file("qps", S_IWUSR, devp->debugfs_root, + (void *)devp, &qp_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + + de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root, + (void *)devp, &stag_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + return 0; +} + +void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx) +{ + struct list_head *pos, *nxt; + struct c4iw_qid_list *entry; + + mutex_lock(&uctx->lock); + list_for_each_safe(pos, nxt, &uctx->qpids) { + entry = list_entry(pos, struct c4iw_qid_list, entry); + list_del_init(&entry->entry); + if (!(entry->qid & rdev->qpmask)) + c4iw_put_resource(&rdev->resource.qid_fifo, entry->qid, + &rdev->resource.qid_fifo_lock); + kfree(entry); + } + + list_for_each_safe(pos, nxt, &uctx->qpids) { + entry = list_entry(pos, struct c4iw_qid_list, entry); + list_del_init(&entry->entry); + kfree(entry); + } + mutex_unlock(&uctx->lock); +} + +void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx) +{ + INIT_LIST_HEAD(&uctx->qpids); + INIT_LIST_HEAD(&uctx->cqids); + mutex_init(&uctx->lock); +} + +/* Caller takes care of locking if needed */ +static int c4iw_rdev_open(struct c4iw_rdev *rdev) +{ + int err; + + c4iw_init_dev_ucontext(rdev, &rdev->uctx); + + /* + * qpshift is the number of bits to shift the qpid left in order + * to get the correct address of the doorbell for that qp. + */ + rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density); + rdev->qpmask = rdev->lldi.udb_density - 1; + rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density); + rdev->cqmask = rdev->lldi.ucq_density - 1; + PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d " + "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x " + "qp qid start %u size %u cq qid start %u size %u\n", + __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, + rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), + rdev->lldi.vr->pbl.start, + rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start, + rdev->lldi.vr->rq.size, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->cq.start, + rdev->lldi.vr->cq.size); + PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " + "qpmask 0x%x cqshift %lu cqmask 0x%x\n", + (unsigned)pci_resource_len(rdev->lldi.pdev, 2), + (void *)pci_resource_start(rdev->lldi.pdev, 2), + rdev->lldi.db_reg, + rdev->lldi.gts_reg, + rdev->qpshift, rdev->qpmask, + rdev->cqshift, rdev->cqmask); + + if (c4iw_num_stags(rdev) == 0) { + err = -EINVAL; + goto err1; + } + + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); + if (err) { + printk(KERN_ERR MOD "error %d initializing resources\n", err); + goto err1; + } + err = c4iw_pblpool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing pbl pool\n", err); + goto err2; + } + err = c4iw_rqtpool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); + goto err3; + } + err = c4iw_ocqp_pool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); + goto err4; + } + return 0; +err4: + c4iw_rqtpool_destroy(rdev); +err3: + c4iw_pblpool_destroy(rdev); +err2: + c4iw_destroy_resource(&rdev->resource); +err1: + return err; +} + +static void c4iw_rdev_close(struct c4iw_rdev *rdev) +{ + c4iw_pblpool_destroy(rdev); + c4iw_rqtpool_destroy(rdev); + c4iw_destroy_resource(&rdev->resource); +} + +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct c4iw_dev *dev; +}; + +static void c4iw_dealloc(struct uld_ctx *ctx) +{ + c4iw_rdev_close(&ctx->dev->rdev); + idr_destroy(&ctx->dev->cqidr); + idr_destroy(&ctx->dev->qpidr); + idr_destroy(&ctx->dev->mmidr); + iounmap(ctx->dev->rdev.oc_mw_kva); + ib_dealloc_device(&ctx->dev->ibdev); + ctx->dev = NULL; +} + +static void c4iw_remove(struct uld_ctx *ctx) +{ + PDBG("%s c4iw_dev %p\n", __func__, ctx->dev); + c4iw_unregister_device(ctx->dev); + c4iw_dealloc(ctx); +} + +static int rdma_supported(const struct cxgb4_lld_info *infop) +{ + return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 && + infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && + infop->vr->cq.size > 0 && infop->vr->ocq.size > 0; +} + +static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) +{ + struct c4iw_dev *devp; + int ret; + + if (!rdma_supported(infop)) { + printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n", + pci_name(infop->pdev)); + return ERR_PTR(-ENOSYS); + } + devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); + if (!devp) { + printk(KERN_ERR MOD "Cannot allocate ib device\n"); + return ERR_PTR(-ENOMEM); + } + devp->rdev.lldi = *infop; + + devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + + (pci_resource_len(devp->rdev.lldi.pdev, 2) - + roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); + devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, + devp->rdev.lldi.vr->ocq.size); + + PDBG(KERN_INFO MOD "ocq memory: " + "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", + devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size, + devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva); + + ret = c4iw_rdev_open(&devp->rdev); + if (ret) { + printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(ret); + } + + idr_init(&devp->cqidr); + idr_init(&devp->qpidr); + idr_init(&devp->mmidr); + spin_lock_init(&devp->lock); + + if (c4iw_debugfs_root) { + devp->debugfs_root = debugfs_create_dir( + pci_name(devp->rdev.lldi.pdev), + c4iw_debugfs_root); + setup_debugfs(devp); + } + return devp; +} + +static void *c4iw_uld_add(const struct cxgb4_lld_info *infop) +{ + struct uld_ctx *ctx; + static int vers_printed; + int i; + + if (!vers_printed++) + printk(KERN_INFO MOD "Chelsio T4 RDMA Driver - version %s\n", + DRV_VERSION); + + ctx = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx) { + ctx = ERR_PTR(-ENOMEM); + goto out; + } + ctx->lldi = *infop; + + PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n", + __func__, pci_name(ctx->lldi.pdev), + ctx->lldi.nchan, ctx->lldi.nrxq, + ctx->lldi.ntxq, ctx->lldi.nports); + + mutex_lock(&dev_mutex); + list_add_tail(&ctx->entry, &uld_ctx_list); + mutex_unlock(&dev_mutex); + + for (i = 0; i < ctx->lldi.nrxq; i++) + PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]); +out: + return ctx; +} + +static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, + const struct pkt_gl *gl) +{ + struct uld_ctx *ctx = handle; + struct c4iw_dev *dev = ctx->dev; + struct sk_buff *skb; + const struct cpl_act_establish *rpl; + unsigned int opcode; + + if (gl == NULL) { + /* omit RSS and rsp_ctrl at end of descriptor */ + unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8; + + skb = alloc_skb(256, GFP_ATOMIC); + if (!skb) + goto nomem; + __skb_put(skb, len); + skb_copy_to_linear_data(skb, &rsp[1], len); + } else if (gl == CXGB4_MSG_AN) { + const struct rsp_ctrl *rc = (void *)rsp; + + u32 qid = be32_to_cpu(rc->pldbuflen_qid); + c4iw_ev_handler(dev, qid); + return 0; + } else { + skb = cxgb4_pktgl_to_skb(gl, 128, 128); + if (unlikely(!skb)) + goto nomem; + } + + rpl = cplhdr(skb); + opcode = rpl->ot.opcode; + + if (c4iw_handlers[opcode]) + c4iw_handlers[opcode](dev, skb); + else + printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__, + opcode); + + return 0; +nomem: + return -1; +} + +static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) +{ + struct uld_ctx *ctx = handle; + + PDBG("%s new_state %u\n", __func__, new_state); + switch (new_state) { + case CXGB4_STATE_UP: + printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev)); + if (!ctx->dev) { + int ret; + + ctx->dev = c4iw_alloc(&ctx->lldi); + if (IS_ERR(ctx->dev)) { + printk(KERN_ERR MOD + "%s: initialization failed: %ld\n", + pci_name(ctx->lldi.pdev), + PTR_ERR(ctx->dev)); + ctx->dev = NULL; + break; + } + ret = c4iw_register_device(ctx->dev); + if (ret) { + printk(KERN_ERR MOD + "%s: RDMA registration failed: %d\n", + pci_name(ctx->lldi.pdev), ret); + c4iw_dealloc(ctx); + } + } + break; + case CXGB4_STATE_DOWN: + printk(KERN_INFO MOD "%s: Down\n", + pci_name(ctx->lldi.pdev)); + if (ctx->dev) + c4iw_remove(ctx); + break; + case CXGB4_STATE_START_RECOVERY: + printk(KERN_INFO MOD "%s: Fatal Error\n", + pci_name(ctx->lldi.pdev)); + if (ctx->dev) { + struct ib_event event; + + ctx->dev->rdev.flags |= T4_FATAL_ERROR; + memset(&event, 0, sizeof event); + event.event = IB_EVENT_DEVICE_FATAL; + event.device = &ctx->dev->ibdev; + ib_dispatch_event(&event); + c4iw_remove(ctx); + } + break; + case CXGB4_STATE_DETACH: + printk(KERN_INFO MOD "%s: Detach\n", + pci_name(ctx->lldi.pdev)); + if (ctx->dev) + c4iw_remove(ctx); + break; + } + return 0; +} + +static struct cxgb4_uld_info c4iw_uld_info = { + .name = DRV_NAME, + .add = c4iw_uld_add, + .rx_handler = c4iw_uld_rx_handler, + .state_change = c4iw_uld_state_change, +}; + +static int __init c4iw_init_module(void) +{ + int err; + + err = c4iw_cm_init(); + if (err) + return err; + + c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL); + if (!c4iw_debugfs_root) + printk(KERN_WARNING MOD + "could not create debugfs entry, continuing\n"); + + cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); + + return 0; +} + +static void __exit c4iw_exit_module(void) +{ + struct uld_ctx *ctx, *tmp; + + mutex_lock(&dev_mutex); + list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) { + if (ctx->dev) + c4iw_remove(ctx); + kfree(ctx); + } + mutex_unlock(&dev_mutex); + cxgb4_unregister_uld(CXGB4_ULD_RDMA); + c4iw_cm_term(); + debugfs_remove_recursive(c4iw_debugfs_root); +} + +module_init(c4iw_init_module); +module_exit(c4iw_exit_module); diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c new file mode 100644 index 00000000..397cb36c --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include + +#include "iw_cxgb4.h" + +static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, + struct c4iw_qp *qhp, + struct t4_cqe *err_cqe, + enum ib_event_type ib_event) +{ + struct ib_event event; + struct c4iw_qp_attributes attrs; + unsigned long flag; + + if ((qhp->attr.state == C4IW_QP_STATE_ERROR) || + (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) { + PDBG("%s AE received after RTS - " + "qp state %d qpid 0x%x status 0x%x\n", __func__, + qhp->attr.state, qhp->wq.sq.qid, CQE_STATUS(err_cqe)); + return; + } + + printk(KERN_ERR MOD "AE qpid 0x%x opcode %d status 0x%x " + "type %d wrid.hi 0x%x wrid.lo 0x%x\n", + CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), + CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), + CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); + + if (qhp->attr.state == C4IW_QP_STATE_RTS) { + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(qhp->rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 0); + } + + event.event = ib_event; + event.device = chp->ibcq.device; + if (ib_event == IB_EVENT_CQ_ERR) + event.element.cq = &chp->ibcq; + else + event.element.qp = &qhp->ibqp; + if (qhp->ibqp.event_handler) + (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); +} + +void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) +{ + struct c4iw_cq *chp; + struct c4iw_qp *qhp; + u32 cqid; + + spin_lock(&dev->lock); + qhp = get_qhp(dev, CQE_QPID(err_cqe)); + if (!qhp) { + printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d " + "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", + CQE_QPID(err_cqe), + CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), + CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), + CQE_WRID_LOW(err_cqe)); + spin_unlock(&dev->lock); + goto out; + } + + if (SQ_TYPE(err_cqe)) + cqid = qhp->attr.scq; + else + cqid = qhp->attr.rcq; + chp = get_chp(dev, cqid); + if (!chp) { + printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d " + "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", + cqid, CQE_QPID(err_cqe), + CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), + CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), + CQE_WRID_LOW(err_cqe)); + spin_unlock(&dev->lock); + goto out; + } + + c4iw_qp_add_ref(&qhp->ibqp); + atomic_inc(&chp->refcnt); + spin_unlock(&dev->lock); + + /* Bad incoming write */ + if (RQ_TYPE(err_cqe) && + (CQE_OPCODE(err_cqe) == FW_RI_RDMA_WRITE)) { + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_REQ_ERR); + goto done; + } + + switch (CQE_STATUS(err_cqe)) { + + /* Completion Events */ + case T4_ERR_SUCCESS: + printk(KERN_ERR MOD "AE with status 0!\n"); + break; + + case T4_ERR_STAG: + case T4_ERR_PDID: + case T4_ERR_QPID: + case T4_ERR_ACCESS: + case T4_ERR_WRAP: + case T4_ERR_BOUND: + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_ACCESS_ERR); + break; + + /* Device Fatal Errors */ + case T4_ERR_ECC: + case T4_ERR_ECC_PSTAG: + case T4_ERR_INTERNAL_ERR: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_DEVICE_FATAL); + break; + + /* QP Fatal Errors */ + case T4_ERR_OUT_OF_RQE: + case T4_ERR_PBL_ADDR_BOUND: + case T4_ERR_CRC: + case T4_ERR_MARKER: + case T4_ERR_PDU_LEN_ERR: + case T4_ERR_DDP_VERSION: + case T4_ERR_RDMA_VERSION: + case T4_ERR_OPCODE: + case T4_ERR_DDP_QUEUE_NUM: + case T4_ERR_MSN: + case T4_ERR_TBIT: + case T4_ERR_MO: + case T4_ERR_MSN_GAP: + case T4_ERR_MSN_RANGE: + case T4_ERR_RQE_ADDR_BOUND: + case T4_ERR_IRD_OVERFLOW: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL); + break; + + default: + printk(KERN_ERR MOD "Unknown T4 status 0x%x QPID 0x%x\n", + CQE_STATUS(err_cqe), qhp->wq.sq.qid); + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL); + break; + } +done: + if (atomic_dec_and_test(&chp->refcnt)) + wake_up(&chp->wait); + c4iw_qp_rem_ref(&qhp->ibqp); +out: + return; +} + +int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) +{ + struct c4iw_cq *chp; + unsigned long flag; + + chp = get_chp(dev, qid); + if (chp) { + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + } else + PDBG("%s unknown cqid 0x%x\n", __func__, qid); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h new file mode 100644 index 00000000..1357c5bf --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -0,0 +1,801 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IW_CXGB4_H__ +#define __IW_CXGB4_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include "cxgb4.h" +#include "cxgb4_uld.h" +#include "l2t.h" +#include "user.h" + +#define DRV_NAME "iw_cxgb4" +#define MOD DRV_NAME ":" + +extern int c4iw_debug; +#define PDBG(fmt, args...) \ +do { \ + if (c4iw_debug) \ + printk(MOD fmt, ## args); \ +} while (0) + +#include "t4.h" + +#define PBL_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->pbl.start) +#define RQT_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->rq.start) + +static inline void *cplhdr(struct sk_buff *skb) +{ + return skb->data; +} + +struct c4iw_resource { + struct kfifo tpt_fifo; + spinlock_t tpt_fifo_lock; + struct kfifo qid_fifo; + spinlock_t qid_fifo_lock; + struct kfifo pdid_fifo; + spinlock_t pdid_fifo_lock; +}; + +struct c4iw_qid_list { + struct list_head entry; + u32 qid; +}; + +struct c4iw_dev_ucontext { + struct list_head qpids; + struct list_head cqids; + struct mutex lock; +}; + +enum c4iw_rdev_flags { + T4_FATAL_ERROR = (1<<0), +}; + +struct c4iw_rdev { + struct c4iw_resource resource; + unsigned long qpshift; + u32 qpmask; + unsigned long cqshift; + u32 cqmask; + struct c4iw_dev_ucontext uctx; + struct gen_pool *pbl_pool; + struct gen_pool *rqt_pool; + struct gen_pool *ocqp_pool; + u32 flags; + struct cxgb4_lld_info lldi; + unsigned long oc_mw_pa; + void __iomem *oc_mw_kva; +}; + +static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) +{ + return rdev->flags & T4_FATAL_ERROR; +} + +static inline int c4iw_num_stags(struct c4iw_rdev *rdev) +{ + return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5)); +} + +#define C4IW_WR_TO (10*HZ) + +struct c4iw_wr_wait { + struct completion completion; + int ret; +}; + +static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + wr_waitp->ret = 0; + init_completion(&wr_waitp->completion); +} + +static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) +{ + wr_waitp->ret = ret; + complete(&wr_waitp->completion); +} + +static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, + struct c4iw_wr_wait *wr_waitp, + u32 hwtid, u32 qpid, + const char *func) +{ + unsigned to = C4IW_WR_TO; + int ret; + + do { + ret = wait_for_completion_timeout(&wr_waitp->completion, to); + if (!ret) { + printk(KERN_ERR MOD "%s - Device %s not responding - " + "tid %u qpid %u\n", func, + pci_name(rdev->lldi.pdev), hwtid, qpid); + if (c4iw_fatal_error(rdev)) { + wr_waitp->ret = -EIO; + break; + } + to = to << 2; + } + } while (!ret); + if (wr_waitp->ret) + PDBG("%s: FW reply %d tid %u qpid %u\n", + pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid); + return wr_waitp->ret; +} + +struct c4iw_dev { + struct ib_device ibdev; + struct c4iw_rdev rdev; + u32 device_cap_flags; + struct idr cqidr; + struct idr qpidr; + struct idr mmidr; + spinlock_t lock; + struct dentry *debugfs_root; +}; + +static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct c4iw_dev, ibdev); +} + +static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev) +{ + return container_of(rdev, struct c4iw_dev, rdev); +} + +static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) +{ + return idr_find(&rhp->cqidr, cqid); +} + +static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) +{ + return idr_find(&rhp->qpidr, qpid); +} + +static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) +{ + return idr_find(&rhp->mmidr, mmid); +} + +static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + int ret; + int newid; + + do { + if (!idr_pre_get(idr, GFP_KERNEL)) + return -ENOMEM; + spin_lock_irq(&rhp->lock); + ret = idr_get_new_above(idr, handle, id, &newid); + BUG_ON(newid != id); + spin_unlock_irq(&rhp->lock); + } while (ret == -EAGAIN); + + return ret; +} + +static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) +{ + spin_lock_irq(&rhp->lock); + idr_remove(idr, id); + spin_unlock_irq(&rhp->lock); +} + +struct c4iw_pd { + struct ib_pd ibpd; + u32 pdid; + struct c4iw_dev *rhp; +}; + +static inline struct c4iw_pd *to_c4iw_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct c4iw_pd, ibpd); +} + +struct tpt_attributes { + u64 len; + u64 va_fbo; + enum fw_ri_mem_perms perms; + u32 stag; + u32 pdid; + u32 qpid; + u32 pbl_addr; + u32 pbl_size; + u32 state:1; + u32 type:2; + u32 rsvd:1; + u32 remote_invaliate_disable:1; + u32 zbva:1; + u32 mw_bind_enable:1; + u32 page_size:5; +}; + +struct c4iw_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct c4iw_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct c4iw_mr, ibmr); +} + +struct c4iw_mw { + struct ib_mw ibmw; + struct c4iw_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct c4iw_mw, ibmw); +} + +struct c4iw_fr_page_list { + struct ib_fast_reg_page_list ibpl; + DEFINE_DMA_UNMAP_ADDR(mapping); + dma_addr_t dma_addr; + struct c4iw_dev *dev; + int size; +}; + +static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( + struct ib_fast_reg_page_list *ibpl) +{ + return container_of(ibpl, struct c4iw_fr_page_list, ibpl); +} + +struct c4iw_cq { + struct ib_cq ibcq; + struct c4iw_dev *rhp; + struct t4_cq cq; + spinlock_t lock; + spinlock_t comp_handler_lock; + atomic_t refcnt; + wait_queue_head_t wait; +}; + +static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct c4iw_cq, ibcq); +} + +struct c4iw_mpa_attributes { + u8 initiator; + u8 recv_marker_enabled; + u8 xmit_marker_enabled; + u8 crc_enabled; + u8 enhanced_rdma_conn; + u8 version; + u8 p2p_type; +}; + +struct c4iw_qp_attributes { + u32 scq; + u32 rcq; + u32 sq_num_entries; + u32 rq_num_entries; + u32 sq_max_sges; + u32 sq_max_sges_rdma_write; + u32 rq_max_sges; + u32 state; + u8 enable_rdma_read; + u8 enable_rdma_write; + u8 enable_bind; + u8 enable_mmid0_fastreg; + u32 max_ord; + u32 max_ird; + u32 pd; + u32 next_state; + char terminate_buffer[52]; + u32 terminate_msg_len; + u8 is_terminate_local; + struct c4iw_mpa_attributes mpa_attr; + struct c4iw_ep *llp_stream_handle; + u8 layer_etype; + u8 ecode; +}; + +struct c4iw_qp { + struct ib_qp ibqp; + struct c4iw_dev *rhp; + struct c4iw_ep *ep; + struct c4iw_qp_attributes attr; + struct t4_wq wq; + spinlock_t lock; + struct mutex mutex; + atomic_t refcnt; + wait_queue_head_t wait; + struct timer_list timer; +}; + +static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct c4iw_qp, ibqp); +} + +struct c4iw_ucontext { + struct ib_ucontext ibucontext; + struct c4iw_dev_ucontext uctx; + u32 key; + spinlock_t mmap_lock; + struct list_head mmaps; +}; + +static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) +{ + return container_of(c, struct c4iw_ucontext, ibucontext); +} + +struct c4iw_mm_entry { + struct list_head entry; + u64 addr; + u32 key; + unsigned len; +}; + +static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext, + u32 key, unsigned len) +{ + struct list_head *pos, *nxt; + struct c4iw_mm_entry *mm; + + spin_lock(&ucontext->mmap_lock); + list_for_each_safe(pos, nxt, &ucontext->mmaps) { + + mm = list_entry(pos, struct c4iw_mm_entry, entry); + if (mm->key == key && mm->len == len) { + list_del_init(&mm->entry); + spin_unlock(&ucontext->mmap_lock); + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, + key, (unsigned long long) mm->addr, mm->len); + return mm; + } + } + spin_unlock(&ucontext->mmap_lock); + return NULL; +} + +static inline void insert_mmap(struct c4iw_ucontext *ucontext, + struct c4iw_mm_entry *mm) +{ + spin_lock(&ucontext->mmap_lock); + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, + mm->key, (unsigned long long) mm->addr, mm->len); + list_add_tail(&mm->entry, &ucontext->mmaps); + spin_unlock(&ucontext->mmap_lock); +} + +enum c4iw_qp_attr_mask { + C4IW_QP_ATTR_NEXT_STATE = 1 << 0, + C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, + C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, + C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, + C4IW_QP_ATTR_MAX_ORD = 1 << 11, + C4IW_QP_ATTR_MAX_IRD = 1 << 12, + C4IW_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, + C4IW_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, + C4IW_QP_ATTR_MPA_ATTR = 1 << 24, + C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, + C4IW_QP_ATTR_VALID_MODIFY = (C4IW_QP_ATTR_ENABLE_RDMA_READ | + C4IW_QP_ATTR_ENABLE_RDMA_WRITE | + C4IW_QP_ATTR_MAX_ORD | + C4IW_QP_ATTR_MAX_IRD | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | + C4IW_QP_ATTR_STREAM_MSG_BUFFER | + C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE) +}; + +int c4iw_modify_qp(struct c4iw_dev *rhp, + struct c4iw_qp *qhp, + enum c4iw_qp_attr_mask mask, + struct c4iw_qp_attributes *attrs, + int internal); + +enum c4iw_qp_state { + C4IW_QP_STATE_IDLE, + C4IW_QP_STATE_RTS, + C4IW_QP_STATE_ERROR, + C4IW_QP_STATE_TERMINATE, + C4IW_QP_STATE_CLOSING, + C4IW_QP_STATE_TOT +}; + +static inline int c4iw_convert_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + case IB_QPS_INIT: + return C4IW_QP_STATE_IDLE; + case IB_QPS_RTS: + return C4IW_QP_STATE_RTS; + case IB_QPS_SQD: + return C4IW_QP_STATE_CLOSING; + case IB_QPS_SQE: + return C4IW_QP_STATE_TERMINATE; + case IB_QPS_ERR: + return C4IW_QP_STATE_ERROR; + default: + return -1; + } +} + +static inline u32 c4iw_ib_to_tpt_access(int a) +{ + return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | + (a & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0) | + (a & IB_ACCESS_LOCAL_WRITE ? FW_RI_MEM_ACCESS_LOCAL_WRITE : 0) | + FW_RI_MEM_ACCESS_LOCAL_READ; +} + +static inline u32 c4iw_ib_to_tpt_bind_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0); +} + +enum c4iw_mmid_state { + C4IW_STAG_STATE_VALID, + C4IW_STAG_STATE_INVALID +}; + +#define C4IW_NODE_DESC "cxgb4 Chelsio Communications" + +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" + +#define MPA_MAX_PRIVATE_DATA 256 +#define MPA_ENHANCED_RDMA_CONN 0x10 +#define MPA_REJECT 0x20 +#define MPA_CRC 0x40 +#define MPA_MARKERS 0x80 +#define MPA_FLAGS_MASK 0xE0 + +#define MPA_V2_PEER2PEER_MODEL 0x8000 +#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000 +#define MPA_V2_RDMA_WRITE_RTR 0x8000 +#define MPA_V2_RDMA_READ_RTR 0x4000 +#define MPA_V2_IRD_ORD_MASK 0x3FFF + +#define c4iw_put_ep(ep) { \ + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ + kref_put(&((ep)->kref), _c4iw_free_ep); \ +} + +#define c4iw_get_ep(ep) { \ + PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ + kref_get(&((ep)->kref)); \ +} +void _c4iw_free_ep(struct kref *kref); + +struct mpa_message { + u8 key[16]; + u8 flags; + u8 revision; + __be16 private_data_size; + u8 private_data[0]; +}; + +struct mpa_v2_conn_params { + __be16 ird; + __be16 ord; +}; + +struct terminate_message { + u8 layer_etype; + u8 ecode; + __be16 hdrct_rsvd; + u8 len_hdrs[0]; +}; + +#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) + +enum c4iw_layers_types { + LAYER_RDMAP = 0x00, + LAYER_DDP = 0x10, + LAYER_MPA = 0x20, + RDMAP_LOCAL_CATA = 0x00, + RDMAP_REMOTE_PROT = 0x01, + RDMAP_REMOTE_OP = 0x02, + DDP_LOCAL_CATA = 0x00, + DDP_TAGGED_ERR = 0x01, + DDP_UNTAGGED_ERR = 0x02, + DDP_LLP = 0x03 +}; + +enum c4iw_rdma_ecodes { + RDMAP_INV_STAG = 0x00, + RDMAP_BASE_BOUNDS = 0x01, + RDMAP_ACC_VIOL = 0x02, + RDMAP_STAG_NOT_ASSOC = 0x03, + RDMAP_TO_WRAP = 0x04, + RDMAP_INV_VERS = 0x05, + RDMAP_INV_OPCODE = 0x06, + RDMAP_STREAM_CATA = 0x07, + RDMAP_GLOBAL_CATA = 0x08, + RDMAP_CANT_INV_STAG = 0x09, + RDMAP_UNSPECIFIED = 0xff +}; + +enum c4iw_ddp_ecodes { + DDPT_INV_STAG = 0x00, + DDPT_BASE_BOUNDS = 0x01, + DDPT_STAG_NOT_ASSOC = 0x02, + DDPT_TO_WRAP = 0x03, + DDPT_INV_VERS = 0x04, + DDPU_INV_QN = 0x01, + DDPU_INV_MSN_NOBUF = 0x02, + DDPU_INV_MSN_RANGE = 0x03, + DDPU_INV_MO = 0x04, + DDPU_MSG_TOOBIG = 0x05, + DDPU_INV_VERS = 0x06 +}; + +enum c4iw_mpa_ecodes { + MPA_CRC_ERR = 0x02, + MPA_MARKER_ERR = 0x03, + MPA_LOCAL_CATA = 0x05, + MPA_INSUFF_IRD = 0x06, + MPA_NOMATCH_RTR = 0x07, +}; + +enum c4iw_ep_state { + IDLE = 0, + LISTEN, + CONNECTING, + MPA_REQ_WAIT, + MPA_REQ_SENT, + MPA_REQ_RCVD, + MPA_REP_SENT, + FPDU_MODE, + ABORTING, + CLOSING, + MORIBUND, + DEAD, +}; + +enum c4iw_ep_flags { + PEER_ABORT_IN_PROGRESS = 0, + ABORT_REQ_IN_PROGRESS = 1, + RELEASE_RESOURCES = 2, + CLOSE_SENT = 3, +}; + +struct c4iw_ep_common { + struct iw_cm_id *cm_id; + struct c4iw_qp *qp; + struct c4iw_dev *dev; + enum c4iw_ep_state state; + struct kref kref; + struct mutex mutex; + struct sockaddr_in local_addr; + struct sockaddr_in remote_addr; + struct c4iw_wr_wait wr_wait; + unsigned long flags; +}; + +struct c4iw_listen_ep { + struct c4iw_ep_common com; + unsigned int stid; + int backlog; +}; + +struct c4iw_ep { + struct c4iw_ep_common com; + struct c4iw_ep *parent_ep; + struct timer_list timer; + struct list_head entry; + unsigned int atid; + u32 hwtid; + u32 snd_seq; + u32 rcv_seq; + struct l2t_entry *l2t; + struct dst_entry *dst; + struct sk_buff *mpa_skb; + struct c4iw_mpa_attributes mpa_attr; + u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; + unsigned int mpa_pkt_len; + u32 ird; + u32 ord; + u32 smac_idx; + u32 tx_chan; + u32 mtu; + u16 mss; + u16 emss; + u16 plen; + u16 rss_qid; + u16 txq_idx; + u16 ctrlq_idx; + u8 tos; + u8 retry_with_mpa_v1; + u8 tried_with_mpa_v1; +}; + +static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline int compute_wscale(int win) +{ + int wscale = 0; + + while (wscale < 14 && (65535< +#include + +#include "iw_cxgb4.h" + +#define T4_ULPTX_MIN_IO 32 +#define C4IW_MAX_INLINE_SIZE 96 + +static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data) +{ + struct sk_buff *skb; + struct ulp_mem_io *req; + struct ulptx_idata *sc; + u8 wr_len, *to_dp, *from_dp; + int copy_len, num_wqe, i, ret = 0; + struct c4iw_wr_wait wr_wait; + + addr &= 0x7FFFFFF; + PDBG("%s addr 0x%x len %u\n", __func__, addr, len); + num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE); + c4iw_init_wr_wait(&wr_wait); + for (i = 0; i < num_wqe; i++) { + + copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE : + len; + wr_len = roundup(sizeof *req + sizeof *sc + + roundup(copy_len, T4_ULPTX_MIN_IO), 16); + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + req = (struct ulp_mem_io *)__skb_put(skb, wr_len); + memset(req, 0, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, 0); + + if (i == (num_wqe-1)) { + req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | + FW_WR_COMPL(1)); + req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; + } else + req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR)); + req->wr.wr_mid = cpu_to_be32( + FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); + + req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE) | (1<<23)); + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN( + DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO))); + req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), + 16)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr + i * 3)); + + sc = (struct ulptx_idata *)(req + 1); + sc->cmd_more = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_IMM)); + sc->len = cpu_to_be32(roundup(copy_len, T4_ULPTX_MIN_IO)); + + to_dp = (u8 *)(sc + 1); + from_dp = (u8 *)data + i * C4IW_MAX_INLINE_SIZE; + if (data) + memcpy(to_dp, from_dp, copy_len); + else + memset(to_dp, 0, copy_len); + if (copy_len % T4_ULPTX_MIN_IO) + memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - + (copy_len % T4_ULPTX_MIN_IO)); + ret = c4iw_ofld_send(rdev, skb); + if (ret) + return ret; + len -= C4IW_MAX_INLINE_SIZE; + } + + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + return ret; +} + +/* + * Build and write a TPT entry. + * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size, + * pbl_size and pbl_addr + * OUT: stag index + */ +static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, + u32 *stag, u8 stag_state, u32 pdid, + enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, + int bind_enabled, u32 zbva, u64 to, + u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr) +{ + int err; + struct fw_ri_tpte tpt; + u32 stag_idx; + static atomic_t key; + + if (c4iw_fatal_error(rdev)) + return -EIO; + + stag_state = stag_state > 0; + stag_idx = (*stag) >> 8; + + if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { + stag_idx = c4iw_get_resource(&rdev->resource.tpt_fifo, + &rdev->resource.tpt_fifo_lock); + if (!stag_idx) + return -ENOMEM; + *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff); + } + PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", + __func__, stag_state, type, pdid, stag_idx); + + /* write TPT entry */ + if (reset_tpt_entry) + memset(&tpt, 0, sizeof(tpt)); + else { + tpt.valid_to_pdid = cpu_to_be32(F_FW_RI_TPTE_VALID | + V_FW_RI_TPTE_STAGKEY((*stag & M_FW_RI_TPTE_STAGKEY)) | + V_FW_RI_TPTE_STAGSTATE(stag_state) | + V_FW_RI_TPTE_STAGTYPE(type) | V_FW_RI_TPTE_PDID(pdid)); + tpt.locread_to_qpid = cpu_to_be32(V_FW_RI_TPTE_PERM(perm) | + (bind_enabled ? F_FW_RI_TPTE_MWBINDEN : 0) | + V_FW_RI_TPTE_ADDRTYPE((zbva ? FW_RI_ZERO_BASED_TO : + FW_RI_VA_BASED_TO))| + V_FW_RI_TPTE_PS(page_size)); + tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + V_FW_RI_TPTE_PBLADDR(PBL_OFF(rdev, pbl_addr)>>3)); + tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt.va_hi = cpu_to_be32((u32)(to >> 32)); + tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt.dca_mwbcnt_pstag = cpu_to_be32(0); + tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + } + err = write_adapter_mem(rdev, stag_idx + + (rdev->lldi.vr->stag.start >> 5), + sizeof(tpt), &tpt); + + if (reset_tpt_entry) + c4iw_put_resource(&rdev->resource.tpt_fifo, stag_idx, + &rdev->resource.tpt_fifo_lock); + return err; +} + +static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, + u32 pbl_addr, u32 pbl_size) +{ + int err; + + PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + __func__, pbl_addr, rdev->lldi.vr->pbl.start, + pbl_size); + + err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl); + return err; +} + +static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, + u32 pbl_addr) +{ + return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, + pbl_size, pbl_addr); +} + +static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid) +{ + *stag = T4_STAG_UNSET; + return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, + 0UL, 0, 0, 0, 0); +} + +static int deallocate_window(struct c4iw_rdev *rdev, u32 stag) +{ + return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, + 0); +} + +static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, + u32 pbl_size, u32 pbl_addr) +{ + *stag = T4_STAG_UNSET; + return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, + 0UL, 0, 0, pbl_size, pbl_addr); +} + +static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) +{ + u32 mmid; + + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); +} + +static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, + struct c4iw_mr *mhp, int shift) +{ + u32 stag = T4_STAG_UNSET; + int ret; + + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, + FW_RI_STAG_NSMR, mhp->attr.perms, + mhp->attr.mw_bind_enable, mhp->attr.zbva, + mhp->attr.va_fbo, mhp->attr.len, shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + return ret; + + ret = finish_mem_reg(mhp, stag); + if (ret) + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + return ret; +} + +static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, + struct c4iw_mr *mhp, int shift, int npages) +{ + u32 stag; + int ret; + + if (npages > mhp->attr.pbl_size) + return -ENOMEM; + + stag = mhp->attr.stag; + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, + FW_RI_STAG_NSMR, mhp->attr.perms, + mhp->attr.mw_bind_enable, mhp->attr.zbva, + mhp->attr.va_fbo, mhp->attr.len, shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + return ret; + + ret = finish_mem_reg(mhp, stag); + if (ret) + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + + return ret; +} + +static int alloc_pbl(struct c4iw_mr *mhp, int npages) +{ + mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev, + npages << 3); + + if (!mhp->attr.pbl_addr) + return -ENOMEM; + + mhp->attr.pbl_size = npages; + + return 0; +} + +static int build_phys_page_list(struct ib_phys_buf *buffer_list, + int num_phys_buf, u64 *iova_start, + u64 *total_size, int *npages, + int *shift, __be64 **page_list) +{ + u64 mask; + int i, j, n; + + mask = 0; + *total_size = 0; + for (i = 0; i < num_phys_buf; ++i) { + if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) + return -EINVAL; + if (i != 0 && i != num_phys_buf - 1 && + (buffer_list[i].size & ~PAGE_MASK)) + return -EINVAL; + *total_size += buffer_list[i].size; + if (i > 0) + mask |= buffer_list[i].addr; + else + mask |= buffer_list[i].addr & PAGE_MASK; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + else + mask |= (buffer_list[i].addr + buffer_list[i].size + + PAGE_SIZE - 1) & PAGE_MASK; + } + + if (*total_size > 0xFFFFFFFFULL) + return -ENOMEM; + + /* Find largest page shift we can use to cover buffers */ + for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) + if ((1ULL << *shift) & mask) + break; + + buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); + buffer_list[0].addr &= ~0ull << *shift; + + *npages = 0; + for (i = 0; i < num_phys_buf; ++i) + *npages += (buffer_list[i].size + + (1ULL << *shift) - 1) >> *shift; + + if (!*npages) + return -EINVAL; + + *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL); + if (!*page_list) + return -ENOMEM; + + n = 0; + for (i = 0; i < num_phys_buf; ++i) + for (j = 0; + j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; + ++j) + (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr + + ((u64) j << *shift)); + + PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n", + __func__, (unsigned long long)*iova_start, + (unsigned long long)mask, *shift, (unsigned long long)*total_size, + *npages); + + return 0; + +} + +int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, + struct ib_pd *pd, struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start) +{ + + struct c4iw_mr mh, *mhp; + struct c4iw_pd *php; + struct c4iw_dev *rhp; + __be64 *page_list = NULL; + int shift = 0; + u64 total_size; + int npages; + int ret; + + PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); + + /* There can be no memory windows */ + if (atomic_read(&mr->usecnt)) + return -EINVAL; + + mhp = to_c4iw_mr(mr); + rhp = mhp->rhp; + php = to_c4iw_pd(mr->pd); + + /* make sure we are on the same adapter */ + if (rhp != php->rhp) + return -EINVAL; + + memcpy(&mh, mhp, sizeof *mhp); + + if (mr_rereg_mask & IB_MR_REREG_PD) + php = to_c4iw_pd(pd); + if (mr_rereg_mask & IB_MR_REREG_ACCESS) { + mh.attr.perms = c4iw_ib_to_tpt_access(acc); + mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) == + IB_ACCESS_MW_BIND; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + ret = build_phys_page_list(buffer_list, num_phys_buf, + iova_start, + &total_size, &npages, + &shift, &page_list); + if (ret) + return ret; + } + + ret = reregister_mem(rhp, php, &mh, shift, npages); + kfree(page_list); + if (ret) + return ret; + if (mr_rereg_mask & IB_MR_REREG_PD) + mhp->attr.pdid = php->pdid; + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + mhp->attr.zbva = 0; + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + } + + return 0; +} + +struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start) +{ + __be64 *page_list; + int shift; + u64 total_size; + int npages; + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + int ret; + + PDBG("%s ib_pd %p\n", __func__, pd); + php = to_c4iw_pd(pd); + rhp = php->rhp; + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + + /* First check that we have enough alignment */ + if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + if (num_phys_buf > 1 && + ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, + &total_size, &npages, &shift, + &page_list); + if (ret) + goto err; + + ret = alloc_pbl(mhp, npages); + if (ret) { + kfree(page_list); + goto err_pbl; + } + + ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr, + npages); + kfree(page_list); + if (ret) + goto err_pbl; + + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + ret = register_mem(rhp, php, mhp, shift); + if (ret) + goto err_pbl; + + return &mhp->ibmr; + +err_pbl: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); + +err: + kfree(mhp); + return ERR_PTR(ret); + +} + +struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + int ret; + u32 stag = T4_STAG_UNSET; + + PDBG("%s ib_pd %p\n", __func__, pd); + php = to_c4iw_pd(pd); + rhp = php->rhp; + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.mw_bind_enable = (acc&IB_ACCESS_MW_BIND) == IB_ACCESS_MW_BIND; + mhp->attr.zbva = 0; + mhp->attr.va_fbo = 0; + mhp->attr.page_size = 0; + mhp->attr.len = ~0UL; + mhp->attr.pbl_size = 0; + + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, + FW_RI_STAG_NSMR, mhp->attr.perms, + mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0); + if (ret) + goto err1; + + ret = finish_mem_reg(mhp, stag); + if (ret) + goto err2; + return &mhp->ibmr; +err2: + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); +err1: + kfree(mhp); + return ERR_PTR(ret); +} + +struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + __be64 *pages; + int shift, n, len; + int i, j, k; + int err = 0; + struct ib_umem_chunk *chunk; + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + + PDBG("%s ib_pd %p\n", __func__, pd); + + if (length == ~0ULL) + return ERR_PTR(-EINVAL); + + if ((length + start) < start) + return ERR_PTR(-EINVAL); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); + if (IS_ERR(mhp->umem)) { + err = PTR_ERR(mhp->umem); + kfree(mhp); + return ERR_PTR(err); + } + + shift = ffs(mhp->umem->page_size) - 1; + + n = 0; + list_for_each_entry(chunk, &mhp->umem->chunk_list, list) + n += chunk->nents; + + err = alloc_pbl(mhp, n); + if (err) + goto err; + + pages = (__be64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_pbl; + } + + i = n = 0; + + list_for_each_entry(chunk, &mhp->umem->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address( + &chunk->page_list[j]) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; + } + } + } + + if (i) + err = write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (n << 3), i); + +pbl_done: + free_page((unsigned long) pages); + if (err) + goto err_pbl; + + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.va_fbo = virt; + mhp->attr.page_size = shift - 12; + mhp->attr.len = length; + + err = register_mem(rhp, php, mhp, shift); + if (err) + goto err_pbl; + + return &mhp->ibmr; + +err_pbl: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); + +err: + ib_umem_release(mhp->umem); + kfree(mhp); + return ERR_PTR(err); +} + +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mw *mhp; + u32 mmid; + u32 stag = 0; + int ret; + + php = to_c4iw_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + ret = allocate_window(&rhp->rdev, &stag, php->pdid); + if (ret) { + kfree(mhp); + return ERR_PTR(ret); + } + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.type = FW_RI_STAG_MW; + mhp->attr.stag = stag; + mmid = (stag) >> 8; + mhp->ibmw.rkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + deallocate_window(&rhp->rdev, mhp->attr.stag); + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmw); +} + +int c4iw_dealloc_mw(struct ib_mw *mw) +{ + struct c4iw_dev *rhp; + struct c4iw_mw *mhp; + u32 mmid; + + mhp = to_c4iw_mw(mw); + rhp = mhp->rhp; + mmid = (mw->rkey) >> 8; + deallocate_window(&rhp->rdev, mhp->attr.stag); + remove_handle(rhp, &rhp->mmidr, mmid); + kfree(mhp); + PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); + return 0; +} + +struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret = 0; + + php = to_c4iw_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) { + ret = -ENOMEM; + goto err; + } + + mhp->rhp = rhp; + ret = alloc_pbl(mhp, pbl_depth); + if (ret) + goto err1; + mhp->attr.pbl_size = pbl_depth; + ret = allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + goto err2; + mhp->attr.pdid = php->pdid; + mhp->attr.type = FW_RI_STAG_NSMR; + mhp->attr.stag = stag; + mhp->attr.state = 1; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + ret = -ENOMEM; + goto err3; + } + + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmr); +err3: + dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); +err2: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +err1: + kfree(mhp); +err: + return ERR_PTR(ret); +} + +struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, + int page_list_len) +{ + struct c4iw_fr_page_list *c4pl; + struct c4iw_dev *dev = to_c4iw_dev(device); + dma_addr_t dma_addr; + int size = sizeof *c4pl + page_list_len * sizeof(u64); + + c4pl = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, size, + &dma_addr, GFP_KERNEL); + if (!c4pl) + return ERR_PTR(-ENOMEM); + + dma_unmap_addr_set(c4pl, mapping, dma_addr); + c4pl->dma_addr = dma_addr; + c4pl->dev = dev; + c4pl->size = size; + c4pl->ibpl.page_list = (u64 *)(c4pl + 1); + c4pl->ibpl.max_page_list_len = page_list_len; + + return &c4pl->ibpl; +} + +void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) +{ + struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, c4pl->size, + c4pl, dma_unmap_addr(c4pl, mapping)); +} + +int c4iw_dereg_mr(struct ib_mr *ib_mr) +{ + struct c4iw_dev *rhp; + struct c4iw_mr *mhp; + u32 mmid; + + PDBG("%s ib_mr %p\n", __func__, ib_mr); + /* There can be no memory windows */ + if (atomic_read(&ib_mr->usecnt)) + return -EINVAL; + + mhp = to_c4iw_mr(ib_mr); + rhp = mhp->rhp; + mmid = mhp->attr.stag >> 8; + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + if (mhp->attr.pbl_size) + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); + remove_handle(rhp, &rhp->mmidr, mmid); + if (mhp->kva) + kfree((void *) (unsigned long) mhp->kva); + if (mhp->umem) + ib_umem_release(mhp->umem); + PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); + kfree(mhp); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c new file mode 100644 index 00000000..be1c18f4 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -0,0 +1,530 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include "iw_cxgb4.h" + +static int fastreg_support = 1; +module_param(fastreg_support, int, 0644); +MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); + +static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + return ERR_PTR(-ENOSYS); +} + +static int c4iw_ah_destroy(struct ib_ah *ah) +{ + return -ENOSYS; +} + +static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -ENOSYS; +} + +static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -ENOSYS; +} + +static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, + u8 port_num, struct ib_wc *in_wc, + struct ib_grh *in_grh, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + return -ENOSYS; +} + +static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +{ + struct c4iw_dev *rhp = to_c4iw_dev(context->device); + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + struct c4iw_mm_entry *mm, *tmp; + + PDBG("%s context %p\n", __func__, context); + list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) + kfree(mm); + c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); + kfree(ucontext); + return 0; +} + +static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct c4iw_ucontext *context; + struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + + PDBG("%s ibdev %p\n", __func__, ibdev); + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); + INIT_LIST_HEAD(&context->mmaps); + spin_lock_init(&context->mmap_lock); + return &context->ibucontext; +} + +static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + int len = vma->vm_end - vma->vm_start; + u32 key = vma->vm_pgoff << PAGE_SHIFT; + struct c4iw_rdev *rdev; + int ret = 0; + struct c4iw_mm_entry *mm; + struct c4iw_ucontext *ucontext; + u64 addr; + + PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, + key, len); + + if (vma->vm_start & (PAGE_SIZE-1)) + return -EINVAL; + + rdev = &(to_c4iw_dev(context->device)->rdev); + ucontext = to_c4iw_ucontext(context); + + mm = remove_mmap(ucontext, key, len); + if (!mm) + return -EINVAL; + addr = mm->addr; + kfree(mm); + + if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 0) + + pci_resource_len(rdev->lldi.pdev, 0)))) { + + /* + * MA_SYNC register... + */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 2) + + pci_resource_len(rdev->lldi.pdev, 2)))) { + + /* + * Map user DB or OCQP memory... + */ + if (addr >= rdev->oc_mw_pa) + vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else { + + /* + * Map WQ or CQ contig dma memory... + */ + ret = remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } + + return ret; +} + +static int c4iw_deallocate_pd(struct ib_pd *pd) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + + php = to_c4iw_pd(pd); + rhp = php->rhp; + PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); + c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, php->pdid, + &rhp->rdev.resource.pdid_fifo_lock); + kfree(php); + return 0; +} + +static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c4iw_pd *php; + u32 pdid; + struct c4iw_dev *rhp; + + PDBG("%s ibdev %p\n", __func__, ibdev); + rhp = (struct c4iw_dev *) ibdev; + pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_fifo, + &rhp->rdev.resource.pdid_fifo_lock); + if (!pdid) + return ERR_PTR(-EINVAL); + php = kzalloc(sizeof(*php), GFP_KERNEL); + if (!php) { + c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, pdid, + &rhp->rdev.resource.pdid_fifo_lock); + return ERR_PTR(-ENOMEM); + } + php->pdid = pdid; + php->rhp = rhp; + if (context) { + if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) { + c4iw_deallocate_pd(&php->ibpd); + return ERR_PTR(-EFAULT); + } + } + PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); + return &php->ibpd; +} + +static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + PDBG("%s ibdev %p\n", __func__, ibdev); + *pkey = 0; + return 0; +} + +static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct c4iw_dev *dev; + + PDBG("%s ibdev %p, port %d, index %d, gid %p\n", + __func__, ibdev, port, index, gid); + dev = to_c4iw_dev(ibdev); + BUG_ON(port == 0); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), dev->rdev.lldi.ports[port-1]->dev_addr, 6); + return 0; +} + +static int c4iw_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + + struct c4iw_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_c4iw_dev(ibdev); + memset(props, 0, sizeof *props); + memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); + props->hw_ver = dev->rdev.lldi.adapter_type; + props->fw_ver = dev->rdev.lldi.fw_vers; + props->device_cap_flags = dev->device_cap_flags; + props->page_size_cap = T4_PAGESIZE_MASK; + props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor; + props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; + props->max_mr_size = T4_MAX_MR_SIZE; + props->max_qp = T4_MAX_NUM_QP; + props->max_qp_wr = T4_MAX_QP_DEPTH; + props->max_sge = T4_MAX_RECV_SGE; + props->max_sge_rd = 1; + props->max_qp_rd_atom = c4iw_max_read_depth; + props->max_qp_init_rd_atom = c4iw_max_read_depth; + props->max_cq = T4_MAX_NUM_CQ; + props->max_cqe = T4_MAX_CQ_DEPTH; + props->max_mr = c4iw_num_stags(&dev->rdev); + props->max_pd = T4_MAX_NUM_PD; + props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH; + + return 0; +} + +static int c4iw_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct c4iw_dev *dev; + struct net_device *netdev; + struct in_device *inetdev; + + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_c4iw_dev(ibdev); + netdev = dev->rdev.lldi.ports[port-1]; + + memset(props, 0, sizeof(struct ib_port_attr)); + props->max_mtu = IB_MTU_4096; + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + if (!netif_carrier_ok(netdev)) + props->state = IB_PORT_DOWN; + else { + inetdev = in_dev_get(netdev); + if (inetdev) { + if (inetdev->ifa_list) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_INIT; + in_dev_put(inetdev); + } else + props->state = IB_PORT_INIT; + } + + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->active_width = 2; + props->active_speed = IB_SPEED_DDR; + props->max_msg_sz = -1; + + return 0; +} + +static ssize_t show_rev(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%d\n", c4iw_dev->rdev.lldi.adapter_type); +} + +static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + + return sprintf(buf, "%u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_GET(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MINOR_GET(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MICRO_GET(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_BUILD_GET(c4iw_dev->rdev.lldi.fw_vers)); +} + +static ssize_t show_hca(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + struct ethtool_drvinfo info; + struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0]; + + PDBG("%s dev 0x%p\n", __func__, dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + return sprintf(buf, "%s\n", info.driver); +} + +static ssize_t show_board(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); +} + +static int c4iw_get_mib(struct ib_device *ibdev, + union rdma_protocol_stats *stats) +{ + struct tp_tcp_stats v4, v6; + struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); + + cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); + memset(stats, 0, sizeof *stats); + stats->iw.tcpInSegs = v4.tcpInSegs + v6.tcpInSegs; + stats->iw.tcpOutSegs = v4.tcpOutSegs + v6.tcpOutSegs; + stats->iw.tcpRetransSegs = v4.tcpRetransSegs + v6.tcpRetransSegs; + stats->iw.tcpOutRsts = v4.tcpOutRsts + v6.tcpOutSegs; + + return 0; +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *c4iw_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id, +}; + +int c4iw_register_device(struct c4iw_dev *dev) +{ + int ret; + int i; + + PDBG("%s c4iw_dev %p\n", __func__, dev); + BUG_ON(!dev->rdev.lldi.ports[0]); + strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX); + memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); + memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); + dev->ibdev.owner = THIS_MODULE; + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + if (fastreg_support) + dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + dev->ibdev.local_dma_lkey = 0; + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + dev->ibdev.node_type = RDMA_NODE_RNIC; + memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); + dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; + dev->ibdev.num_comp_vectors = 1; + dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev); + dev->ibdev.query_device = c4iw_query_device; + dev->ibdev.query_port = c4iw_query_port; + dev->ibdev.query_pkey = c4iw_query_pkey; + dev->ibdev.query_gid = c4iw_query_gid; + dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext; + dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext; + dev->ibdev.mmap = c4iw_mmap; + dev->ibdev.alloc_pd = c4iw_allocate_pd; + dev->ibdev.dealloc_pd = c4iw_deallocate_pd; + dev->ibdev.create_ah = c4iw_ah_create; + dev->ibdev.destroy_ah = c4iw_ah_destroy; + dev->ibdev.create_qp = c4iw_create_qp; + dev->ibdev.modify_qp = c4iw_ib_modify_qp; + dev->ibdev.destroy_qp = c4iw_destroy_qp; + dev->ibdev.create_cq = c4iw_create_cq; + dev->ibdev.destroy_cq = c4iw_destroy_cq; + dev->ibdev.resize_cq = c4iw_resize_cq; + dev->ibdev.poll_cq = c4iw_poll_cq; + dev->ibdev.get_dma_mr = c4iw_get_dma_mr; + dev->ibdev.reg_phys_mr = c4iw_register_phys_mem; + dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem; + dev->ibdev.reg_user_mr = c4iw_reg_user_mr; + dev->ibdev.dereg_mr = c4iw_dereg_mr; + dev->ibdev.alloc_mw = c4iw_alloc_mw; + dev->ibdev.bind_mw = c4iw_bind_mw; + dev->ibdev.dealloc_mw = c4iw_dealloc_mw; + dev->ibdev.alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr; + dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; + dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl; + dev->ibdev.attach_mcast = c4iw_multicast_attach; + dev->ibdev.detach_mcast = c4iw_multicast_detach; + dev->ibdev.process_mad = c4iw_process_mad; + dev->ibdev.req_notify_cq = c4iw_arm_cq; + dev->ibdev.post_send = c4iw_post_send; + dev->ibdev.post_recv = c4iw_post_receive; + dev->ibdev.get_protocol_stats = c4iw_get_mib; + dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; + + dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + if (!dev->ibdev.iwcm) + return -ENOMEM; + + dev->ibdev.iwcm->connect = c4iw_connect; + dev->ibdev.iwcm->accept = c4iw_accept_cr; + dev->ibdev.iwcm->reject = c4iw_reject_cr; + dev->ibdev.iwcm->create_listen = c4iw_create_listen; + dev->ibdev.iwcm->destroy_listen = c4iw_destroy_listen; + dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; + dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; + dev->ibdev.iwcm->get_qp = c4iw_get_qp; + + ret = ib_register_device(&dev->ibdev, NULL); + if (ret) + goto bail1; + + for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) { + ret = device_create_file(&dev->ibdev.dev, + c4iw_class_attributes[i]); + if (ret) + goto bail2; + } + return 0; +bail2: + ib_unregister_device(&dev->ibdev); +bail1: + kfree(dev->ibdev.iwcm); + return ret; +} + +void c4iw_unregister_device(struct c4iw_dev *dev) +{ + int i; + + PDBG("%s c4iw_dev %p\n", __func__, dev); + for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) + device_remove_file(&dev->ibdev.dev, + c4iw_class_attributes[i]); + ib_unregister_device(&dev->ibdev); + kfree(dev->ibdev.iwcm); + return; +} diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c new file mode 100644 index 00000000..5f940aea --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -0,0 +1,1623 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "iw_cxgb4.h" + +static int ocqp_support = 1; +module_param(ocqp_support, int, 0644); +MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); + +static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) +{ + unsigned long flag; + spin_lock_irqsave(&qhp->lock, flag); + qhp->attr.state = state; + spin_unlock_irqrestore(&qhp->lock, flag); +} + +static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize); +} + +static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue, + pci_unmap_addr(sq, mapping)); +} + +static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (t4_sq_onchip(sq)) + dealloc_oc_sq(rdev, sq); + else + dealloc_host_sq(rdev, sq); +} + +static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (!ocqp_support || !t4_ocqp_supported()) + return -ENOSYS; + sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); + if (!sq->dma_addr) + return -ENOMEM; + sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr - + rdev->lldi.vr->ocq.start; + sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr - + rdev->lldi.vr->ocq.start); + sq->flags |= T4_SQ_ONCHIP; + return 0; +} + +static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize, + &(sq->dma_addr), GFP_KERNEL); + if (!sq->queue) + return -ENOMEM; + sq->phys_addr = virt_to_phys(sq->queue); + pci_unmap_addr_set(sq, mapping, sq->dma_addr); + return 0; +} + +static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + struct c4iw_dev_ucontext *uctx) +{ + /* + * uP clears EQ contexts when the connection exits rdma mode, + * so no need to post a RESET WR for these EQs. + */ + dma_free_coherent(&(rdev->lldi.pdev->dev), + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + dealloc_sq(rdev, &wq->sq); + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + kfree(wq->rq.sw_rq); + kfree(wq->sq.sw_sq); + c4iw_put_qpid(rdev, wq->rq.qid, uctx); + c4iw_put_qpid(rdev, wq->sq.qid, uctx); + return 0; +} + +static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + struct t4_cq *rcq, struct t4_cq *scq, + struct c4iw_dev_ucontext *uctx) +{ + int user = (uctx != &rdev->uctx); + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + struct c4iw_wr_wait wr_wait; + struct sk_buff *skb; + int ret; + int eqsize; + + wq->sq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->sq.qid) + return -ENOMEM; + + wq->rq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->rq.qid) + goto err1; + + if (!user) { + wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, + GFP_KERNEL); + if (!wq->sq.sw_sq) + goto err2; + + wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, + GFP_KERNEL); + if (!wq->rq.sw_rq) + goto err3; + } + + /* + * RQT must be a power of 2. + */ + wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); + wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); + if (!wq->rq.rqt_hwaddr) + goto err4; + + if (user) { + if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) + goto err5; + } else + if (alloc_host_sq(rdev, &wq->sq)) + goto err5; + memset(wq->sq.queue, 0, wq->sq.memsize); + dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); + + wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), + wq->rq.memsize, &(wq->rq.dma_addr), + GFP_KERNEL); + if (!wq->rq.queue) + goto err6; + PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + __func__, wq->sq.queue, + (unsigned long long)virt_to_phys(wq->sq.queue), + wq->rq.queue, + (unsigned long long)virt_to_phys(wq->rq.queue)); + memset(wq->rq.queue, 0, wq->rq.memsize); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); + + wq->db = rdev->lldi.db_reg; + wq->gts = rdev->lldi.gts_reg; + if (user) { + wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + + (wq->sq.qid << rdev->qpshift); + wq->sq.udb &= PAGE_MASK; + wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + + (wq->rq.qid << rdev->qpshift); + wq->rq.udb &= PAGE_MASK; + } + wq->rdev = rdev; + wq->rq.msn = 1; + + /* build fw_ri_res_wr */ + wr_len = sizeof *res_wr + 2 * sizeof *res; + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto err7; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP(FW_RI_RES_WR) | + V_FW_RI_RES_WR_NRES(2) | + FW_WR_COMPL(1)); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (unsigned long) &wr_wait; + res = res_wr->res; + res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + + res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( + V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ + V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ + V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ + (t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0) | + V_FW_RI_RES_WR_IQID(scq->cqid)); + res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( + V_FW_RI_RES_WR_DCAEN(0) | + V_FW_RI_RES_WR_DCACPU(0) | + V_FW_RI_RES_WR_FBMIN(2) | + V_FW_RI_RES_WR_FBMAX(2) | + V_FW_RI_RES_WR_CIDXFTHRESHO(0) | + V_FW_RI_RES_WR_CIDXFTHRESH(0) | + V_FW_RI_RES_WR_EQSIZE(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); + res++; + res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( + V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ + V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ + V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ + V_FW_RI_RES_WR_IQID(rcq->cqid)); + res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( + V_FW_RI_RES_WR_DCAEN(0) | + V_FW_RI_RES_WR_DCACPU(0) | + V_FW_RI_RES_WR_FBMIN(2) | + V_FW_RI_RES_WR_FBMAX(2) | + V_FW_RI_RES_WR_CIDXFTHRESHO(0) | + V_FW_RI_RES_WR_CIDXFTHRESH(0) | + V_FW_RI_RES_WR_EQSIZE(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + + c4iw_init_wr_wait(&wr_wait); + + ret = c4iw_ofld_send(rdev, skb); + if (ret) + goto err7; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); + if (ret) + goto err7; + + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", + __func__, wq->sq.qid, wq->rq.qid, wq->db, + (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); + + return 0; +err7: + dma_free_coherent(&(rdev->lldi.pdev->dev), + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); +err6: + dealloc_sq(rdev, &wq->sq); +err5: + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); +err4: + kfree(wq->rq.sw_rq); +err3: + kfree(wq->sq.sw_sq); +err2: + c4iw_put_qpid(rdev, wq->rq.qid, uctx); +err1: + c4iw_put_qpid(rdev, wq->sq.qid, uctx); + return -ENOMEM; +} + +static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, + struct ib_send_wr *wr, int max, u32 *plenp) +{ + u8 *dstp, *srcp; + u32 plen = 0; + int i; + int rem, len; + + dstp = (u8 *)immdp->data; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) > max) + return -EMSGSIZE; + srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; + plen += wr->sg_list[i].length; + rem = wr->sg_list[i].length; + while (rem) { + if (dstp == (u8 *)&sq->queue[sq->size]) + dstp = (u8 *)sq->queue; + if (rem <= (u8 *)&sq->queue[sq->size] - dstp) + len = rem; + else + len = (u8 *)&sq->queue[sq->size] - dstp; + memcpy(dstp, srcp, len); + dstp += len; + srcp += len; + rem -= len; + } + } + len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp); + if (len) + memset(dstp, 0, len); + immdp->op = FW_RI_DATA_IMMD; + immdp->r1 = 0; + immdp->r2 = 0; + immdp->immdlen = cpu_to_be32(plen); + *plenp = plen; + return 0; +} + +static int build_isgl(__be64 *queue_start, __be64 *queue_end, + struct fw_ri_isgl *isglp, struct ib_sge *sg_list, + int num_sge, u32 *plenp) + +{ + int i; + u32 plen = 0; + __be64 *flitp = (__be64 *)isglp->sge; + + for (i = 0; i < num_sge; i++) { + if ((plen + sg_list[i].length) < plen) + return -EMSGSIZE; + plen += sg_list[i].length; + *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) | + sg_list[i].length); + if (++flitp == queue_end) + flitp = queue_start; + *flitp = cpu_to_be64(sg_list[i].addr); + if (++flitp == queue_end) + flitp = queue_start; + } + *flitp = (__force __be64)0; + isglp->op = FW_RI_DATA_ISGL; + isglp->r1 = 0; + isglp->nsge = cpu_to_be16(num_sge); + isglp->r2 = 0; + if (plenp) + *plenp = plen; + return 0; +} + +static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + int ret; + + if (wr->num_sge > T4_MAX_SEND_SGE) + return -EINVAL; + switch (wr->opcode) { + case IB_WR_SEND: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE)); + else + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND)); + wqe->send.stag_inv = 0; + break; + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE_INV)); + else + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_INV)); + wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); + break; + + default: + return -EINVAL; + } + + plen = 0; + if (wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE) { + ret = build_immd(sq, wqe->send.u.immd_src, wr, + T4_MAX_SEND_INLINE, &plen); + if (ret) + return ret; + size = sizeof wqe->send + sizeof(struct fw_ri_immd) + + plen; + } else { + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->send.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; + size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + } + } else { + wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; + wqe->send.u.immd_src[0].r1 = 0; + wqe->send.u.immd_src[0].r2 = 0; + wqe->send.u.immd_src[0].immdlen = 0; + size = sizeof wqe->send + sizeof(struct fw_ri_immd); + plen = 0; + } + *len16 = DIV_ROUND_UP(size, 16); + wqe->send.plen = cpu_to_be32(plen); + return 0; +} + +static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + int ret; + + if (wr->num_sge > T4_MAX_SEND_SGE) + return -EINVAL; + wqe->write.r2 = 0; + wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); + wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + if (wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE) { + ret = build_immd(sq, wqe->write.u.immd_src, wr, + T4_MAX_WRITE_INLINE, &plen); + if (ret) + return ret; + size = sizeof wqe->write + sizeof(struct fw_ri_immd) + + plen; + } else { + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->write.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; + size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + } + } else { + wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; + wqe->write.u.immd_src[0].r1 = 0; + wqe->write.u.immd_src[0].r2 = 0; + wqe->write.u.immd_src[0].immdlen = 0; + size = sizeof wqe->write + sizeof(struct fw_ri_immd); + plen = 0; + } + *len16 = DIV_ROUND_UP(size, 16); + wqe->write.plen = cpu_to_be32(plen); + return 0; +} + +static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +{ + if (wr->num_sge > 1) + return -EINVAL; + if (wr->num_sge) { + wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr + >> 32)); + wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); + wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); + wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); + wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr + >> 32)); + wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr)); + } else { + wqe->read.stag_src = cpu_to_be32(2); + wqe->read.to_src_hi = 0; + wqe->read.to_src_lo = 0; + wqe->read.stag_sink = cpu_to_be32(2); + wqe->read.plen = 0; + wqe->read.to_sink_hi = 0; + wqe->read.to_sink_lo = 0; + } + wqe->read.r2 = 0; + wqe->read.r5 = 0; + *len16 = DIV_ROUND_UP(sizeof wqe->read, 16); + return 0; +} + +static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, + struct ib_recv_wr *wr, u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)qhp->wq.rq.queue, + (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size], + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof wqe->recv + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + +static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) +{ + + struct fw_ri_immd *imdp; + __be64 *p; + int i; + int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int rem; + + if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) + return -EINVAL; + + wqe->fr.qpbinde_to_dcacpu = 0; + wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; + wqe->fr.addr_type = FW_RI_VA_BASED_TO; + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); + wqe->fr.len_hi = 0; + wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); + wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); + wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & + 0xffffffff); + WARN_ON(pbllen > T4_MAX_FR_IMMD); + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); + return 0; +} + +static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, + u8 *len16) +{ + wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->inv.r2 = 0; + *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); + return 0; +} + +void c4iw_qp_add_ref(struct ib_qp *qp) +{ + PDBG("%s ib_qp %p\n", __func__, qp); + atomic_inc(&(to_c4iw_qp(qp)->refcnt)); +} + +void c4iw_qp_rem_ref(struct ib_qp *qp) +{ + PDBG("%s ib_qp %p\n", __func__, qp); + if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) + wake_up(&(to_c4iw_qp(qp)->wait)); +} + +int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int err = 0; + u8 len16 = 0; + enum fw_wr_opcodes fw_opcode = 0; + enum fw_ri_wr_flags fw_flags; + struct c4iw_qp *qhp; + union t4_wr *wqe; + u32 num_wrs; + struct t4_swsqe *swsqe; + unsigned long flag; + u16 idx = 0; + + qhp = to_c4iw_qp(ibqp); + spin_lock_irqsave(&qhp->lock, flag); + if (t4_wq_in_error(&qhp->wq)) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -EINVAL; + } + num_wrs = t4_sq_avail(&qhp->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -ENOMEM; + } + while (wr) { + if (num_wrs == 0) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + + fw_flags = 0; + if (wr->send_flags & IB_SEND_SOLICITED) + fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; + if (wr->send_flags & IB_SEND_SIGNALED) + fw_flags |= FW_RI_COMPLETION_FLAG; + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + switch (wr->opcode) { + case IB_WR_SEND_WITH_INV: + case IB_WR_SEND: + if (wr->send_flags & IB_SEND_FENCE) + fw_flags |= FW_RI_READ_FENCE_FLAG; + fw_opcode = FW_RI_SEND_WR; + if (wr->opcode == IB_WR_SEND) + swsqe->opcode = FW_RI_SEND; + else + swsqe->opcode = FW_RI_SEND_WITH_INV; + err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); + break; + case IB_WR_RDMA_WRITE: + fw_opcode = FW_RI_RDMA_WRITE_WR; + swsqe->opcode = FW_RI_RDMA_WRITE; + err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + fw_opcode = FW_RI_RDMA_READ_WR; + swsqe->opcode = FW_RI_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + fw_flags = FW_RI_RDMA_READ_INVALIDATE; + else + fw_flags = 0; + err = build_rdma_read(wqe, wr, &len16); + if (err) + break; + swsqe->read_len = wr->sg_list[0].length; + if (!qhp->wq.sq.oldest_read) + qhp->wq.sq.oldest_read = swsqe; + break; + case IB_WR_FAST_REG_MR: + fw_opcode = FW_RI_FR_NSMR_WR; + swsqe->opcode = FW_RI_FAST_REGISTER; + err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); + break; + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + fw_flags |= FW_RI_LOCAL_FENCE_FLAG; + fw_opcode = FW_RI_INV_LSTAG_WR; + swsqe->opcode = FW_RI_LOCAL_INV; + err = build_inv_stag(wqe, wr, &len16); + break; + default: + PDBG("%s post of type=%d TBD!\n", __func__, + wr->opcode); + err = -EINVAL; + } + if (err) { + *bad_wr = wr; + break; + } + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->wr_id = wr->wr_id; + + init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); + + PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n", + __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, + swsqe->opcode, swsqe->read_len); + wr = wr->next; + num_wrs--; + t4_sq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + } + if (t4_wq_db_enabled(&qhp->wq)) + t4_ring_sq_db(&qhp->wq, idx); + spin_unlock_irqrestore(&qhp->lock, flag); + return err; +} + +int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct c4iw_qp *qhp; + union t4_recv_wr *wqe; + u32 num_wrs; + u8 len16 = 0; + unsigned long flag; + u16 idx = 0; + + qhp = to_c4iw_qp(ibqp); + spin_lock_irqsave(&qhp->lock, flag); + if (t4_wq_in_error(&qhp->wq)) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -EINVAL; + } + num_wrs = t4_rq_avail(&qhp->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue + + qhp->wq.rq.wq_pidx * + T4_EQ_ENTRY_SIZE); + if (num_wrs) + err = build_rdma_recv(qhp, wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = qhp->wq.rq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + PDBG("%s cookie 0x%llx pidx %u\n", __func__, + (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); + t4_rq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + wr = wr->next; + num_wrs--; + } + if (t4_wq_db_enabled(&qhp->wq)) + t4_ring_rq_db(&qhp->wq, idx); + spin_unlock_irqrestore(&qhp->lock, flag); + return err; +} + +int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) +{ + return -ENOSYS; +} + +static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, + u8 *ecode) +{ + int status; + int tagged; + int opcode; + int rqtype; + int send_inv; + + if (!err_cqe) { + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + return; + } + + status = CQE_STATUS(err_cqe); + opcode = CQE_OPCODE(err_cqe); + rqtype = RQ_TYPE(err_cqe); + send_inv = (opcode == FW_RI_SEND_WITH_INV) || + (opcode == FW_RI_SEND_WITH_SE_INV); + tagged = (opcode == FW_RI_RDMA_WRITE) || + (rqtype && (opcode == FW_RI_READ_RESP)); + + switch (status) { + case T4_ERR_STAG: + if (send_inv) { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_INV_STAG; + } + break; + case T4_ERR_PDID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + if ((opcode == FW_RI_SEND_WITH_INV) || + (opcode == FW_RI_SEND_WITH_SE_INV)) + *ecode = RDMAP_CANT_INV_STAG; + else + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case T4_ERR_QPID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case T4_ERR_ACCESS: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_ACC_VIOL; + break; + case T4_ERR_WRAP: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_TO_WRAP; + break; + case T4_ERR_BOUND: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_BASE_BOUNDS; + } + break; + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + break; + case T4_ERR_ECC: + case T4_ERR_ECC_PSTAG: + case T4_ERR_INTERNAL_ERR: + *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; + *ecode = 0; + break; + case T4_ERR_OUT_OF_RQE: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_NOBUF; + break; + case T4_ERR_PBL_ADDR_BOUND: + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + break; + case T4_ERR_CRC: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_CRC_ERR; + break; + case T4_ERR_MARKER: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_MARKER_ERR; + break; + case T4_ERR_PDU_LEN_ERR: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_MSG_TOOBIG; + break; + case T4_ERR_DDP_VERSION: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_INV_VERS; + } else { + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_VERS; + } + break; + case T4_ERR_RDMA_VERSION: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_VERS; + break; + case T4_ERR_OPCODE: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_OPCODE; + break; + case T4_ERR_DDP_QUEUE_NUM: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_QN; + break; + case T4_ERR_MSN: + case T4_ERR_MSN_GAP: + case T4_ERR_MSN_RANGE: + case T4_ERR_IRD_OVERFLOW: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_RANGE; + break; + case T4_ERR_TBIT: + *layer_type = LAYER_DDP|DDP_LOCAL_CATA; + *ecode = 0; + break; + case T4_ERR_MO: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MO; + break; + default: + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + break; + } +} + +static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, + gfp_t gfp) +{ + struct fw_ri_wr *wqe; + struct sk_buff *skb; + struct terminate_message *term; + + PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, + qhp->ep->hwtid); + + skb = alloc_skb(sizeof *wqe, gfp); + if (!skb) + return; + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); + + wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof *wqe); + wqe->op_compl = cpu_to_be32(FW_WR_OP(FW_RI_INIT_WR)); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(qhp->ep->hwtid) | + FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + + wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; + wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); + term = (struct terminate_message *)wqe->u.terminate.termmsg; + if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { + term->layer_etype = qhp->attr.layer_etype; + term->ecode = qhp->attr.ecode; + } else + build_term_codes(err_cqe, &term->layer_etype, &term->ecode); + c4iw_ofld_send(&qhp->rhp->rdev, skb); +} + +/* + * Assumes qhp lock is held. + */ +static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, + struct c4iw_cq *schp) +{ + int count; + int flushed; + unsigned long flag; + + PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); + + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + spin_lock(&qhp->lock); + c4iw_flush_hw_cq(&rchp->cq); + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + if (flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&schp->lock, flag); + spin_lock(&qhp->lock); + c4iw_flush_hw_cq(&schp->cq); + c4iw_count_scqes(&schp->cq, &qhp->wq, &count); + flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&schp->lock, flag); + if (flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } +} + +static void flush_qp(struct c4iw_qp *qhp) +{ + struct c4iw_cq *rchp, *schp; + unsigned long flag; + + rchp = get_chp(qhp->rhp, qhp->attr.rcq); + schp = get_chp(qhp->rhp, qhp->attr.scq); + + if (qhp->ibqp.uobject) { + t4_set_wq_in_error(&qhp->wq); + t4_set_cq_in_error(&rchp->cq); + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + if (schp != rchp) { + t4_set_cq_in_error(&schp->cq); + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } + return; + } + __flush_qp(qhp, rchp, schp); +} + +static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + struct c4iw_ep *ep) +{ + struct fw_ri_wr *wqe; + int ret; + struct sk_buff *skb; + + PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, + ep->hwtid); + + skb = alloc_skb(sizeof *wqe, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof *wqe); + wqe->op_compl = cpu_to_be32( + FW_WR_OP(FW_RI_INIT_WR) | + FW_WR_COMPL(1)); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + wqe->cookie = (unsigned long) &ep->com.wr_wait; + + wqe->u.fini.type = FW_RI_TYPE_FINI; + ret = c4iw_ofld_send(&rhp->rdev, skb); + if (ret) + goto out; + + ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid, + qhp->wq.sq.qid, __func__); +out: + PDBG("%s ret %d\n", __func__, ret); + return ret; +} + +static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) +{ + PDBG("%s p2p_type = %d\n", __func__, p2p_type); + memset(&init->u, 0, sizeof init->u); + switch (p2p_type) { + case FW_RI_INIT_P2PTYPE_RDMA_WRITE: + init->u.write.opcode = FW_RI_RDMA_WRITE_WR; + init->u.write.stag_sink = cpu_to_be32(1); + init->u.write.to_sink = cpu_to_be64(1); + init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD; + init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write + + sizeof(struct fw_ri_immd), + 16); + break; + case FW_RI_INIT_P2PTYPE_READ_REQ: + init->u.write.opcode = FW_RI_RDMA_READ_WR; + init->u.read.stag_src = cpu_to_be32(1); + init->u.read.to_src_lo = cpu_to_be32(1); + init->u.read.stag_sink = cpu_to_be32(1); + init->u.read.to_sink_lo = cpu_to_be32(1); + init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16); + break; + } +} + +static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) +{ + struct fw_ri_wr *wqe; + int ret; + struct sk_buff *skb; + + PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, + qhp->ep->hwtid); + + skb = alloc_skb(sizeof *wqe, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); + + wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof *wqe); + wqe->op_compl = cpu_to_be32( + FW_WR_OP(FW_RI_INIT_WR) | + FW_WR_COMPL(1)); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(qhp->ep->hwtid) | + FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + + wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; + + wqe->u.init.type = FW_RI_TYPE_INIT; + wqe->u.init.mpareqbit_p2ptype = + V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) | + V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type); + wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE; + if (qhp->attr.mpa_attr.recv_marker_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE; + if (qhp->attr.mpa_attr.xmit_marker_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE; + if (qhp->attr.mpa_attr.crc_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE; + + wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE | + FW_RI_QP_RDMA_WRITE_ENABLE | + FW_RI_QP_BIND_ENABLE; + if (!qhp->ibqp.uobject) + wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE | + FW_RI_QP_STAG0_ENABLE; + wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq)); + wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); + wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); + wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); + wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); + wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); + wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); + wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); + wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); + wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); + wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); + wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - + rhp->rdev.lldi.vr->rq.start); + if (qhp->attr.mpa_attr.initiator) + build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); + + ret = c4iw_ofld_send(&rhp->rdev, skb); + if (ret) + goto out; + + ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); +out: + PDBG("%s ret %d\n", __func__, ret); + return ret; +} + +int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + enum c4iw_qp_attr_mask mask, + struct c4iw_qp_attributes *attrs, + int internal) +{ + int ret = 0; + struct c4iw_qp_attributes newattr = qhp->attr; + int disconnect = 0; + int terminate = 0; + int abort = 0; + int free = 0; + struct c4iw_ep *ep = NULL; + + PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__, + qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state, + (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); + + mutex_lock(&qhp->mutex); + + /* Process attr changes if in IDLE */ + if (mask & C4IW_QP_ATTR_VALID_MODIFY) { + if (qhp->attr.state != C4IW_QP_STATE_IDLE) { + ret = -EIO; + goto out; + } + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ) + newattr.enable_rdma_read = attrs->enable_rdma_read; + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE) + newattr.enable_rdma_write = attrs->enable_rdma_write; + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND) + newattr.enable_bind = attrs->enable_bind; + if (mask & C4IW_QP_ATTR_MAX_ORD) { + if (attrs->max_ord > c4iw_max_read_depth) { + ret = -EINVAL; + goto out; + } + newattr.max_ord = attrs->max_ord; + } + if (mask & C4IW_QP_ATTR_MAX_IRD) { + if (attrs->max_ird > c4iw_max_read_depth) { + ret = -EINVAL; + goto out; + } + newattr.max_ird = attrs->max_ird; + } + qhp->attr = newattr; + } + + if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) + goto out; + if (qhp->attr.state == attrs->next_state) + goto out; + + switch (qhp->attr.state) { + case C4IW_QP_STATE_IDLE: + switch (attrs->next_state) { + case C4IW_QP_STATE_RTS: + if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) { + ret = -EINVAL; + goto out; + } + if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) { + ret = -EINVAL; + goto out; + } + qhp->attr.mpa_attr = attrs->mpa_attr; + qhp->attr.llp_stream_handle = attrs->llp_stream_handle; + qhp->ep = qhp->attr.llp_stream_handle; + set_state(qhp, C4IW_QP_STATE_RTS); + + /* + * Ref the endpoint here and deref when we + * disassociate the endpoint from the QP. This + * happens in CLOSING->IDLE transition or *->ERROR + * transition. + */ + c4iw_get_ep(&qhp->ep->com); + ret = rdma_init(rhp, qhp); + if (ret) + goto err; + break; + case C4IW_QP_STATE_ERROR: + set_state(qhp, C4IW_QP_STATE_ERROR); + flush_qp(qhp); + break; + default: + ret = -EINVAL; + goto out; + } + break; + case C4IW_QP_STATE_RTS: + switch (attrs->next_state) { + case C4IW_QP_STATE_CLOSING: + BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + set_state(qhp, C4IW_QP_STATE_CLOSING); + ep = qhp->ep; + if (!internal) { + abort = 0; + disconnect = 1; + c4iw_get_ep(&qhp->ep->com); + } + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + break; + case C4IW_QP_STATE_TERMINATE: + set_state(qhp, C4IW_QP_STATE_TERMINATE); + qhp->attr.layer_etype = attrs->layer_etype; + qhp->attr.ecode = attrs->ecode; + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); + ep = qhp->ep; + if (!internal) + terminate = 1; + disconnect = 1; + c4iw_get_ep(&qhp->ep->com); + break; + case C4IW_QP_STATE_ERROR: + set_state(qhp, C4IW_QP_STATE_ERROR); + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); + if (!internal) { + abort = 1; + disconnect = 1; + ep = qhp->ep; + c4iw_get_ep(&qhp->ep->com); + } + goto err; + break; + default: + ret = -EINVAL; + goto out; + } + break; + case C4IW_QP_STATE_CLOSING: + if (!internal) { + ret = -EINVAL; + goto out; + } + switch (attrs->next_state) { + case C4IW_QP_STATE_IDLE: + flush_qp(qhp); + set_state(qhp, C4IW_QP_STATE_IDLE); + qhp->attr.llp_stream_handle = NULL; + c4iw_put_ep(&qhp->ep->com); + qhp->ep = NULL; + wake_up(&qhp->wait); + break; + case C4IW_QP_STATE_ERROR: + goto err; + default: + ret = -EINVAL; + goto err; + } + break; + case C4IW_QP_STATE_ERROR: + if (attrs->next_state != C4IW_QP_STATE_IDLE) { + ret = -EINVAL; + goto out; + } + if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) { + ret = -EINVAL; + goto out; + } + set_state(qhp, C4IW_QP_STATE_IDLE); + break; + case C4IW_QP_STATE_TERMINATE: + if (!internal) { + ret = -EINVAL; + goto out; + } + goto err; + break; + default: + printk(KERN_ERR "%s in a bad state %d\n", + __func__, qhp->attr.state); + ret = -EINVAL; + goto err; + break; + } + goto out; +err: + PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, + qhp->wq.sq.qid); + + /* disassociate the LLP connection */ + qhp->attr.llp_stream_handle = NULL; + if (!ep) + ep = qhp->ep; + qhp->ep = NULL; + set_state(qhp, C4IW_QP_STATE_ERROR); + free = 1; + wake_up(&qhp->wait); + BUG_ON(!ep); + flush_qp(qhp); +out: + mutex_unlock(&qhp->mutex); + + if (terminate) + post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL); + + /* + * If disconnect is 1, then we need to initiate a disconnect + * on the EP. This can be a normal close (RTS->CLOSING) or + * an abnormal close (RTS/CLOSING->ERROR). + */ + if (disconnect) { + c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC : + GFP_KERNEL); + c4iw_put_ep(&ep->com); + } + + /* + * If free is 1, then we've disassociated the EP from the QP + * and we need to dereference the EP. + */ + if (free) + c4iw_put_ep(&ep->com); + PDBG("%s exit state %d\n", __func__, qhp->attr.state); + return ret; +} + +int c4iw_destroy_qp(struct ib_qp *ib_qp) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + struct c4iw_qp_attributes attrs; + struct c4iw_ucontext *ucontext; + + qhp = to_c4iw_qp(ib_qp); + rhp = qhp->rhp; + + attrs.next_state = C4IW_QP_STATE_ERROR; + if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + else + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + wait_event(qhp->wait, !qhp->ep); + + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); + atomic_dec(&qhp->refcnt); + wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + + ucontext = ib_qp->uobject ? + to_c4iw_ucontext(ib_qp->uobject->context) : NULL; + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); + kfree(qhp); + return 0; +} + +struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + struct c4iw_pd *php; + struct c4iw_cq *schp; + struct c4iw_cq *rchp; + struct c4iw_create_qp_resp uresp; + int sqsize, rqsize; + struct c4iw_ucontext *ucontext; + int ret; + struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; + + PDBG("%s ib_pd %p\n", __func__, pd); + + if (attrs->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid); + rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid); + if (!schp || !rchp) + return ERR_PTR(-EINVAL); + + if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) + return ERR_PTR(-EINVAL); + + rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); + if (rqsize > T4_MAX_RQ_SIZE) + return ERR_PTR(-E2BIG); + + sqsize = roundup(attrs->cap.max_send_wr + 1, 16); + if (sqsize > T4_MAX_SQ_SIZE) + return ERR_PTR(-E2BIG); + + ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + + + qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); + if (!qhp) + return ERR_PTR(-ENOMEM); + qhp->wq.sq.size = sqsize; + qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; + qhp->wq.rq.size = rqsize; + qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; + + if (ucontext) { + qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); + qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); + } + + PDBG("%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu\n", + __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize); + + ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + if (ret) + goto err1; + + attrs->cap.max_recv_wr = rqsize - 1; + attrs->cap.max_send_wr = sqsize - 1; + attrs->cap.max_inline_data = T4_MAX_SEND_INLINE; + + qhp->rhp = rhp; + qhp->attr.pd = php->pdid; + qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; + qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; + qhp->attr.sq_num_entries = attrs->cap.max_send_wr; + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.sq_max_sges = attrs->cap.max_send_sge; + qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + qhp->attr.state = C4IW_QP_STATE_IDLE; + qhp->attr.next_state = C4IW_QP_STATE_IDLE; + qhp->attr.enable_rdma_read = 1; + qhp->attr.enable_rdma_write = 1; + qhp->attr.enable_bind = 1; + qhp->attr.max_ord = 1; + qhp->attr.max_ird = 1; + spin_lock_init(&qhp->lock); + mutex_init(&qhp->mutex); + init_waitqueue_head(&qhp->wait); + atomic_set(&qhp->refcnt, 1); + + ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + if (ret) + goto err2; + + if (udata) { + mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); + if (!mm1) { + ret = -ENOMEM; + goto err3; + } + mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); + if (!mm2) { + ret = -ENOMEM; + goto err4; + } + mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); + if (!mm3) { + ret = -ENOMEM; + goto err5; + } + mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); + if (!mm4) { + ret = -ENOMEM; + goto err6; + } + if (t4_sq_onchip(&qhp->wq.sq)) { + mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); + if (!mm5) { + ret = -ENOMEM; + goto err7; + } + uresp.flags = C4IW_QPF_ONCHIP; + } else + uresp.flags = 0; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.sqid = qhp->wq.sq.qid; + uresp.sq_size = qhp->wq.sq.size; + uresp.sq_memsize = qhp->wq.sq.memsize; + uresp.rqid = qhp->wq.rq.qid; + uresp.rq_size = qhp->wq.rq.size; + uresp.rq_memsize = qhp->wq.rq.memsize; + spin_lock(&ucontext->mmap_lock); + if (mm5) { + uresp.ma_sync_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } + uresp.sq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.rq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.sq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.rq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + if (ret) + goto err8; + mm1->key = uresp.sq_key; + mm1->addr = qhp->wq.sq.phys_addr; + mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); + insert_mmap(ucontext, mm1); + mm2->key = uresp.rq_key; + mm2->addr = virt_to_phys(qhp->wq.rq.queue); + mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, mm2); + mm3->key = uresp.sq_db_gts_key; + mm3->addr = qhp->wq.sq.udb; + mm3->len = PAGE_SIZE; + insert_mmap(ucontext, mm3); + mm4->key = uresp.rq_db_gts_key; + mm4->addr = qhp->wq.rq.udb; + mm4->len = PAGE_SIZE; + insert_mmap(ucontext, mm4); + if (mm5) { + mm5->key = uresp.ma_sync_key; + mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + A_PCIE_MA_SYNC) & PAGE_MASK; + mm5->len = PAGE_SIZE; + insert_mmap(ucontext, mm5); + } + } + qhp->ibqp.qp_num = qhp->wq.sq.qid; + init_timer(&(qhp->timer)); + PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n", + __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, + qhp->wq.sq.qid); + return &qhp->ibqp; +err8: + kfree(mm5); +err7: + kfree(mm4); +err6: + kfree(mm3); +err5: + kfree(mm2); +err4: + kfree(mm1); +err3: + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); +err2: + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); +err1: + kfree(qhp); + return ERR_PTR(ret); +} + +int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + enum c4iw_qp_attr_mask mask = 0; + struct c4iw_qp_attributes attrs; + + PDBG("%s ib_qp %p\n", __func__, ibqp); + + /* iwarp does not support the RTR state */ + if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) + attr_mask &= ~IB_QP_STATE; + + /* Make sure we still have something left to do */ + if (!attr_mask) + return 0; + + memset(&attrs, 0, sizeof attrs); + qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; + + attrs.next_state = c4iw_convert_state(attr->qp_state); + attrs.enable_rdma_read = (attr->qp_access_flags & + IB_ACCESS_REMOTE_READ) ? 1 : 0; + attrs.enable_rdma_write = (attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; + + + mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0; + mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? + (C4IW_QP_ATTR_ENABLE_RDMA_READ | + C4IW_QP_ATTR_ENABLE_RDMA_WRITE | + C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; + + return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); +} + +struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) +{ + PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); + return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c new file mode 100644 index 00000000..407ff392 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* Crude resource management */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "iw_cxgb4.h" + +#define RANDOM_SIZE 16 + +static int __c4iw_init_resource_fifo(struct kfifo *fifo, + spinlock_t *fifo_lock, + u32 nr, u32 skip_low, + u32 skip_high, + int random) +{ + u32 i, j, entry = 0, idx; + u32 random_bytes; + u32 rarray[16]; + spin_lock_init(fifo_lock); + + if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) + return -ENOMEM; + + for (i = 0; i < skip_low + skip_high; i++) + kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); + if (random) { + j = 0; + random_bytes = random32(); + for (i = 0; i < RANDOM_SIZE; i++) + rarray[i] = i + skip_low; + for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { + if (j >= RANDOM_SIZE) { + j = 0; + random_bytes = random32(); + } + idx = (random_bytes >> (j * 2)) & 0xF; + kfifo_in(fifo, + (unsigned char *) &rarray[idx], + sizeof(u32)); + rarray[idx] = i; + j++; + } + for (i = 0; i < RANDOM_SIZE; i++) + kfifo_in(fifo, + (unsigned char *) &rarray[i], + sizeof(u32)); + } else + for (i = skip_low; i < nr - skip_high; i++) + kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); + + for (i = 0; i < skip_low + skip_high; i++) + if (kfifo_out_locked(fifo, (unsigned char *) &entry, + sizeof(u32), fifo_lock)) + break; + return 0; +} + +static int c4iw_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, + u32 nr, u32 skip_low, u32 skip_high) +{ + return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low, + skip_high, 0); +} + +static int c4iw_init_resource_fifo_random(struct kfifo *fifo, + spinlock_t *fifo_lock, + u32 nr, u32 skip_low, u32 skip_high) +{ + return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low, + skip_high, 1); +} + +static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev) +{ + u32 i; + + spin_lock_init(&rdev->resource.qid_fifo_lock); + + if (kfifo_alloc(&rdev->resource.qid_fifo, rdev->lldi.vr->qp.size * + sizeof(u32), GFP_KERNEL)) + return -ENOMEM; + + for (i = rdev->lldi.vr->qp.start; + i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) + if (!(i & rdev->qpmask)) + kfifo_in(&rdev->resource.qid_fifo, + (unsigned char *) &i, sizeof(u32)); + return 0; +} + +/* nr_* must be power of 2 */ +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) +{ + int err = 0; + err = c4iw_init_resource_fifo_random(&rdev->resource.tpt_fifo, + &rdev->resource.tpt_fifo_lock, + nr_tpt, 1, 0); + if (err) + goto tpt_err; + err = c4iw_init_qid_fifo(rdev); + if (err) + goto qid_err; + err = c4iw_init_resource_fifo(&rdev->resource.pdid_fifo, + &rdev->resource.pdid_fifo_lock, + nr_pdid, 1, 0); + if (err) + goto pdid_err; + return 0; +pdid_err: + kfifo_free(&rdev->resource.qid_fifo); +qid_err: + kfifo_free(&rdev->resource.tpt_fifo); +tpt_err: + return -ENOMEM; +} + +/* + * returns 0 if no resource available + */ +u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock) +{ + u32 entry; + if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) + return entry; + else + return 0; +} + +void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock) +{ + PDBG("%s entry 0x%x\n", __func__, entry); + kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock); +} + +u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + u32 qid; + int i; + + mutex_lock(&uctx->lock); + if (!list_empty(&uctx->cqids)) { + entry = list_entry(uctx->cqids.next, struct c4iw_qid_list, + entry); + list_del(&entry->entry); + qid = entry->qid; + kfree(entry); + } else { + qid = c4iw_get_resource(&rdev->resource.qid_fifo, + &rdev->resource.qid_fifo_lock); + if (!qid) + goto out; + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->cqids); + } + + /* + * now put the same ids on the qp list since they all + * map to the same db/gts page. + */ + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = qid; + list_add_tail(&entry->entry, &uctx->qpids); + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->qpids); + } + } +out: + mutex_unlock(&uctx->lock); + PDBG("%s qid 0x%x\n", __func__, qid); + return qid; +} + +void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return; + PDBG("%s qid 0x%x\n", __func__, qid); + entry->qid = qid; + mutex_lock(&uctx->lock); + list_add_tail(&entry->entry, &uctx->cqids); + mutex_unlock(&uctx->lock); +} + +u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + u32 qid; + int i; + + mutex_lock(&uctx->lock); + if (!list_empty(&uctx->qpids)) { + entry = list_entry(uctx->qpids.next, struct c4iw_qid_list, + entry); + list_del(&entry->entry); + qid = entry->qid; + kfree(entry); + } else { + qid = c4iw_get_resource(&rdev->resource.qid_fifo, + &rdev->resource.qid_fifo_lock); + if (!qid) + goto out; + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->qpids); + } + + /* + * now put the same ids on the cq list since they all + * map to the same db/gts page. + */ + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = qid; + list_add_tail(&entry->entry, &uctx->cqids); + for (i = qid; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->cqids); + } + } +out: + mutex_unlock(&uctx->lock); + PDBG("%s qid 0x%x\n", __func__, qid); + return qid; +} + +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return; + PDBG("%s qid 0x%x\n", __func__, qid); + entry->qid = qid; + mutex_lock(&uctx->lock); + list_add_tail(&entry->entry, &uctx->qpids); + mutex_unlock(&uctx->lock); +} + +void c4iw_destroy_resource(struct c4iw_resource *rscp) +{ + kfifo_free(&rscp->tpt_fifo); + kfifo_free(&rscp->qid_fifo); + kfifo_free(&rscp->pdid_fifo); +} + +/* + * PBL Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ + +u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + if (!addr) + printk_ratelimited(KERN_WARNING MOD "%s: Out of PBL memory\n", + pci_name(rdev->lldi.pdev)); + return (u32)addr; +} + +void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); +} + +int c4iw_pblpool_create(struct c4iw_rdev *rdev) +{ + unsigned pbl_start, pbl_chunk, pbl_top; + + rdev->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); + if (!rdev->pbl_pool) + return -ENOMEM; + + pbl_start = rdev->lldi.vr->pbl.start; + pbl_chunk = rdev->lldi.vr->pbl.size; + pbl_top = pbl_start + pbl_chunk; + + while (pbl_start < pbl_top) { + pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk); + if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) { + PDBG("%s failed to add PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all PBL chunks (%x/%x)\n", + pbl_start, + pbl_top - pbl_start); + return 0; + } + pbl_chunk >>= 1; + } else { + PDBG("%s added PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + pbl_start += pbl_chunk; + } + } + + return 0; +} + +void c4iw_pblpool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->pbl_pool); +} + +/* + * RQT Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_RQT_SHIFT 10 /* 1KB == min RQT size (16 entries) */ + +u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); + if (!addr) + printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n", + pci_name(rdev->lldi.pdev)); + return (u32)addr; +} + +void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6); + gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); +} + +int c4iw_rqtpool_create(struct c4iw_rdev *rdev) +{ + unsigned rqt_start, rqt_chunk, rqt_top; + + rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); + if (!rdev->rqt_pool) + return -ENOMEM; + + rqt_start = rdev->lldi.vr->rq.start; + rqt_chunk = rdev->lldi.vr->rq.size; + rqt_top = rqt_start + rqt_chunk; + + while (rqt_start < rqt_top) { + rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk); + if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) { + PDBG("%s failed to add RQT chunk (%x/%x)\n", + __func__, rqt_start, rqt_chunk); + if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all RQT chunks (%x/%x)\n", + rqt_start, rqt_top - rqt_start); + return 0; + } + rqt_chunk >>= 1; + } else { + PDBG("%s added RQT chunk (%x/%x)\n", + __func__, rqt_start, rqt_chunk); + rqt_start += rqt_chunk; + } + } + return 0; +} + +void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->rqt_pool); +} + +/* + * On-Chip QP Memory. + */ +#define MIN_OCQP_SHIFT 12 /* 4KB == min ocqp size */ + +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + return (u32)addr; +} + +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); +} + +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) +{ + unsigned start, chunk, top; + + rdev->ocqp_pool = gen_pool_create(MIN_OCQP_SHIFT, -1); + if (!rdev->ocqp_pool) + return -ENOMEM; + + start = rdev->lldi.vr->ocq.start; + chunk = rdev->lldi.vr->ocq.size; + top = start + chunk; + + while (start < top) { + chunk = min(top - start + 1, chunk); + if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) { + PDBG("%s failed to add OCQP chunk (%x/%x)\n", + __func__, start, chunk); + if (chunk <= 1024 << MIN_OCQP_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all OCQP chunks (%x/%x)\n", + start, top - start); + return 0; + } + chunk >>= 1; + } else { + PDBG("%s added OCQP chunk (%x/%x)\n", + __func__, start, chunk); + start += chunk; + } + } + return 0; +} + +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->ocqp_pool); +} diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h new file mode 100644 index 00000000..c0221eec --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __T4_H__ +#define __T4_H__ + +#include "t4_hw.h" +#include "t4_regs.h" +#include "t4_msg.h" +#include "t4fw_ri_api.h" + +#define T4_MAX_NUM_QP (1<<16) +#define T4_MAX_NUM_CQ (1<<15) +#define T4_MAX_NUM_PD (1<<15) +#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) +#define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES) +#define T4_MAX_IQ_SIZE (65520 - 1) +#define T4_MAX_RQ_SIZE (8192 - T4_EQ_STATUS_ENTRIES) +#define T4_MAX_SQ_SIZE (T4_MAX_EQ_SIZE - 1) +#define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1) +#define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1) +#define T4_MAX_NUM_STAG (1<<15) +#define T4_MAX_MR_SIZE (~0ULL - 1) +#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ +#define T4_STAG_UNSET 0xffffffff +#define T4_FW_MAJ 0 +#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) +#define A_PCIE_MA_SYNC 0x30b4 + +struct t4_status_page { + __be32 rsvd1; /* flit 0 - hw owns */ + __be16 rsvd2; + __be16 qid; + __be16 cidx; + __be16 pidx; + u8 qp_err; /* flit 1 - sw owns */ + u8 db_off; +}; + +#define T4_EQ_ENTRY_SIZE 64 + +#define T4_SQ_NUM_SLOTS 5 +#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) +#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ + sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) +#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ + sizeof(struct fw_ri_immd))) +#define T4_MAX_WRITE_INLINE ((T4_SQ_NUM_BYTES - \ + sizeof(struct fw_ri_rdma_write_wr) - \ + sizeof(struct fw_ri_immd))) +#define T4_MAX_WRITE_SGE ((T4_SQ_NUM_BYTES - \ + sizeof(struct fw_ri_rdma_write_wr) - \ + sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) +#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ + sizeof(struct fw_ri_immd)) & ~31UL) +#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) + +#define T4_RQ_NUM_SLOTS 2 +#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) +#define T4_MAX_RECV_SGE 4 + +union t4_wr { + struct fw_ri_res_wr res; + struct fw_ri_wr ri; + struct fw_ri_rdma_write_wr write; + struct fw_ri_send_wr send; + struct fw_ri_rdma_read_wr read; + struct fw_ri_bind_mw_wr bind; + struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_inv_lstag_wr inv; + struct t4_status_page status; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; +}; + +union t4_recv_wr { + struct fw_ri_recv_wr recv; + struct t4_status_page status; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; +}; + +static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, + enum fw_wr_opcodes opcode, u8 flags, u8 len16) +{ + wqe->send.opcode = (u8)opcode; + wqe->send.flags = flags; + wqe->send.wrid = wrid; + wqe->send.r1[0] = 0; + wqe->send.r1[1] = 0; + wqe->send.r1[2] = 0; + wqe->send.len16 = len16; +} + +/* CQE/AE status codes */ +#define T4_ERR_SUCCESS 0x0 +#define T4_ERR_STAG 0x1 /* STAG invalid: either the */ + /* STAG is offlimt, being 0, */ + /* or STAG_key mismatch */ +#define T4_ERR_PDID 0x2 /* PDID mismatch */ +#define T4_ERR_QPID 0x3 /* QPID mismatch */ +#define T4_ERR_ACCESS 0x4 /* Invalid access right */ +#define T4_ERR_WRAP 0x5 /* Wrap error */ +#define T4_ERR_BOUND 0x6 /* base and bounds voilation */ +#define T4_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ + /* shared memory region */ +#define T4_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ + /* shared memory region */ +#define T4_ERR_ECC 0x9 /* ECC error detected */ +#define T4_ERR_ECC_PSTAG 0xA /* ECC error detected when */ + /* reading PSTAG for a MW */ + /* Invalidate */ +#define T4_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ + /* software error */ +#define T4_ERR_SWFLUSH 0xC /* SW FLUSHED */ +#define T4_ERR_CRC 0x10 /* CRC error */ +#define T4_ERR_MARKER 0x11 /* Marker error */ +#define T4_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ +#define T4_ERR_OUT_OF_RQE 0x13 /* out of RQE */ +#define T4_ERR_DDP_VERSION 0x14 /* wrong DDP version */ +#define T4_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ +#define T4_ERR_OPCODE 0x16 /* invalid rdma opcode */ +#define T4_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ +#define T4_ERR_MSN 0x18 /* MSN error */ +#define T4_ERR_TBIT 0x19 /* tag bit not set correctly */ +#define T4_ERR_MO 0x1A /* MO not 0 for TERMINATE */ + /* or READ_REQ */ +#define T4_ERR_MSN_GAP 0x1B +#define T4_ERR_MSN_RANGE 0x1C +#define T4_ERR_IRD_OVERFLOW 0x1D +#define T4_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ + /* software error */ +#define T4_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ + /* mismatch) */ +/* + * CQE defs + */ +struct t4_cqe { + __be32 header; + __be32 len; + union { + struct { + __be32 stag; + __be32 msn; + } rcqe; + struct { + u32 nada1; + u16 nada2; + u16 cidx; + } scqe; + struct { + __be32 wrid_hi; + __be32 wrid_low; + } gen; + } u; + __be64 reserved; + __be64 bits_type_ts; +}; + +/* macros for flit 0 of the cqe */ + +#define S_CQE_QPID 12 +#define M_CQE_QPID 0xFFFFF +#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID) +#define V_CQE_QPID(x) ((x)<> S_CQE_SWCQE)) & M_CQE_SWCQE) +#define V_CQE_SWCQE(x) ((x)<> S_CQE_STATUS)) & M_CQE_STATUS) +#define V_CQE_STATUS(x) ((x)<> S_CQE_TYPE)) & M_CQE_TYPE) +#define V_CQE_TYPE(x) ((x)<> S_CQE_OPCODE)) & M_CQE_OPCODE) +#define V_CQE_OPCODE(x) ((x)<header))) +#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x)->header))) +#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x)->header))) +#define SQ_TYPE(x) (CQE_TYPE((x))) +#define RQ_TYPE(x) (!CQE_TYPE((x))) +#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x)->header))) +#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x)->header))) + +#define CQE_SEND_OPCODE(x)( \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND) || \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE) || \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_INV) || \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE_INV)) + +#define CQE_LEN(x) (be32_to_cpu((x)->len)) + +/* used for RQ completion processing */ +#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) +#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) + +/* used for SQ completion processing */ +#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) + +/* generic accessor macros */ +#define CQE_WRID_HI(x) ((x)->u.gen.wrid_hi) +#define CQE_WRID_LOW(x) ((x)->u.gen.wrid_low) + +/* macros for flit 3 of the cqe */ +#define S_CQE_GENBIT 63 +#define M_CQE_GENBIT 0x1 +#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT) +#define V_CQE_GENBIT(x) ((x)<> S_CQE_OVFBIT)) & M_CQE_OVFBIT) + +#define S_CQE_IQTYPE 60 +#define M_CQE_IQTYPE 0x3 +#define G_CQE_IQTYPE(x) ((((x) >> S_CQE_IQTYPE)) & M_CQE_IQTYPE) + +#define M_CQE_TS 0x0fffffffffffffffULL +#define G_CQE_TS(x) ((x) & M_CQE_TS) + +#define CQE_OVFBIT(x) ((unsigned)G_CQE_OVFBIT(be64_to_cpu((x)->bits_type_ts))) +#define CQE_GENBIT(x) ((unsigned)G_CQE_GENBIT(be64_to_cpu((x)->bits_type_ts))) +#define CQE_TS(x) (G_CQE_TS(be64_to_cpu((x)->bits_type_ts))) + +struct t4_swsqe { + u64 wr_id; + struct t4_cqe cqe; + int read_len; + int opcode; + int complete; + int signaled; + u16 idx; +}; + +static inline pgprot_t t4_pgprot_wc(pgprot_t prot) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return pgprot_writecombine(prot); +#else + return pgprot_noncached(prot); +#endif +} + +static inline int t4_ocqp_supported(void) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return 1; +#else + return 0; +#endif +} + +enum { + T4_SQ_ONCHIP = (1<<0), +}; + +struct t4_sq { + union t4_wr *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + unsigned long phys_addr; + struct t4_swsqe *sw_sq; + struct t4_swsqe *oldest_read; + u64 udb; + size_t memsize; + u32 qid; + u16 in_use; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 flags; +}; + +struct t4_swrqe { + u64 wr_id; +}; + +struct t4_rq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + u64 udb; + size_t memsize; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u16 rqt_size; + u16 in_use; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; +}; + +struct t4_wq { + struct t4_sq sq; + struct t4_rq rq; + void __iomem *db; + void __iomem *gts; + struct c4iw_rdev *rdev; +}; + +static inline int t4_rqes_posted(struct t4_wq *wq) +{ + return wq->rq.in_use; +} + +static inline int t4_rq_empty(struct t4_wq *wq) +{ + return wq->rq.in_use == 0; +} + +static inline int t4_rq_full(struct t4_wq *wq) +{ + return wq->rq.in_use == (wq->rq.size - 1); +} + +static inline u32 t4_rq_avail(struct t4_wq *wq) +{ + return wq->rq.size - 1 - wq->rq.in_use; +} + +static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) +{ + wq->rq.in_use++; + if (++wq->rq.pidx == wq->rq.size) + wq->rq.pidx = 0; + wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS) + wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS; +} + +static inline void t4_rq_consume(struct t4_wq *wq) +{ + wq->rq.in_use--; + wq->rq.msn++; + if (++wq->rq.cidx == wq->rq.size) + wq->rq.cidx = 0; +} + +static inline int t4_sq_onchip(struct t4_sq *sq) +{ + return sq->flags & T4_SQ_ONCHIP; +} + +static inline int t4_sq_empty(struct t4_wq *wq) +{ + return wq->sq.in_use == 0; +} + +static inline int t4_sq_full(struct t4_wq *wq) +{ + return wq->sq.in_use == (wq->sq.size - 1); +} + +static inline u32 t4_sq_avail(struct t4_wq *wq) +{ + return wq->sq.size - 1 - wq->sq.in_use; +} + +static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) +{ + wq->sq.in_use++; + if (++wq->sq.pidx == wq->sq.size) + wq->sq.pidx = 0; + wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS) + wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS; +} + +static inline void t4_sq_consume(struct t4_wq *wq) +{ + wq->sq.in_use--; + if (++wq->sq.cidx == wq->sq.size) + wq->sq.cidx = 0; +} + +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) +{ + wmb(); + writel(QID(wq->sq.qid) | PIDX(inc), wq->db); +} + +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) +{ + wmb(); + writel(QID(wq->rq.qid) | PIDX(inc), wq->db); +} + +static inline int t4_wq_in_error(struct t4_wq *wq) +{ + return wq->rq.queue[wq->rq.size].status.qp_err; +} + +static inline void t4_set_wq_in_error(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.qp_err = 1; +} + +static inline void t4_disable_wq_db(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.db_off = 1; +} + +static inline void t4_enable_wq_db(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.db_off = 0; +} + +static inline int t4_wq_db_enabled(struct t4_wq *wq) +{ + return !wq->rq.queue[wq->rq.size].status.db_off; +} + +struct t4_cq { + struct t4_cqe *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_cqe *sw_queue; + void __iomem *gts; + struct c4iw_rdev *rdev; + u64 ugts; + size_t memsize; + __be64 bits_type_ts; + u32 cqid; + u16 size; /* including status page */ + u16 cidx; + u16 sw_pidx; + u16 sw_cidx; + u16 sw_in_use; + u16 cidx_inc; + u8 gen; + u8 error; +}; + +static inline int t4_arm_cq(struct t4_cq *cq, int se) +{ + u32 val; + + while (cq->cidx_inc > CIDXINC_MASK) { + val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | + INGRESSQID(cq->cqid); + writel(val, cq->gts); + cq->cidx_inc -= CIDXINC_MASK; + } + val = SEINTARM(se) | CIDXINC(cq->cidx_inc) | TIMERREG(6) | + INGRESSQID(cq->cqid); + writel(val, cq->gts); + cq->cidx_inc = 0; + return 0; +} + +static inline void t4_swcq_produce(struct t4_cq *cq) +{ + cq->sw_in_use++; + if (++cq->sw_pidx == cq->size) + cq->sw_pidx = 0; +} + +static inline void t4_swcq_consume(struct t4_cq *cq) +{ + cq->sw_in_use--; + if (++cq->sw_cidx == cq->size) + cq->sw_cidx = 0; +} + +static inline void t4_hwcq_consume(struct t4_cq *cq) +{ + cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts; + if (++cq->cidx_inc == (cq->size >> 4)) { + u32 val; + + val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) | + INGRESSQID(cq->cqid); + writel(val, cq->gts); + cq->cidx_inc = 0; + } + if (++cq->cidx == cq->size) { + cq->cidx = 0; + cq->gen ^= 1; + } +} + +static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) +{ + return (CQE_GENBIT(cqe) == cq->gen); +} + +static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) +{ + int ret; + u16 prev_cidx; + + if (cq->cidx == 0) + prev_cidx = cq->size - 1; + else + prev_cidx = cq->cidx - 1; + + if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) { + ret = -EOVERFLOW; + cq->error = 1; + printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); + } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { + *cqe = &cq->queue[cq->cidx]; + ret = 0; + } else + ret = -ENODATA; + return ret; +} + +static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) +{ + if (cq->sw_in_use) + return &cq->sw_queue[cq->sw_cidx]; + return NULL; +} + +static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) +{ + int ret = 0; + + if (cq->error) + ret = -ENODATA; + else if (cq->sw_in_use) + *cqe = &cq->sw_queue[cq->sw_cidx]; + else + ret = t4_next_hw_cqe(cq, cqe); + return ret; +} + +static inline int t4_cq_in_error(struct t4_cq *cq) +{ + return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; +} + +static inline void t4_set_cq_in_error(struct t4_cq *cq) +{ + ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; +} +#endif diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h new file mode 100644 index 00000000..dc193c29 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -0,0 +1,839 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _T4FW_RI_API_H_ +#define _T4FW_RI_API_H_ + +#include "t4fw_api.h" + +enum fw_ri_wr_opcode { + FW_RI_RDMA_WRITE = 0x0, /* IETF RDMAP v1.0 ... */ + FW_RI_READ_REQ = 0x1, + FW_RI_READ_RESP = 0x2, + FW_RI_SEND = 0x3, + FW_RI_SEND_WITH_INV = 0x4, + FW_RI_SEND_WITH_SE = 0x5, + FW_RI_SEND_WITH_SE_INV = 0x6, + FW_RI_TERMINATE = 0x7, + FW_RI_RDMA_INIT = 0x8, /* CHELSIO RI specific ... */ + FW_RI_BIND_MW = 0x9, + FW_RI_FAST_REGISTER = 0xa, + FW_RI_LOCAL_INV = 0xb, + FW_RI_QP_MODIFY = 0xc, + FW_RI_BYPASS = 0xd, + FW_RI_RECEIVE = 0xe, + + FW_RI_SGE_EC_CR_RETURN = 0xf +}; + +enum fw_ri_wr_flags { + FW_RI_COMPLETION_FLAG = 0x01, + FW_RI_NOTIFICATION_FLAG = 0x02, + FW_RI_SOLICITED_EVENT_FLAG = 0x04, + FW_RI_READ_FENCE_FLAG = 0x08, + FW_RI_LOCAL_FENCE_FLAG = 0x10, + FW_RI_RDMA_READ_INVALIDATE = 0x20 +}; + +enum fw_ri_mpa_attrs { + FW_RI_MPA_RX_MARKER_ENABLE = 0x01, + FW_RI_MPA_TX_MARKER_ENABLE = 0x02, + FW_RI_MPA_CRC_ENABLE = 0x04, + FW_RI_MPA_IETF_ENABLE = 0x08 +}; + +enum fw_ri_qp_caps { + FW_RI_QP_RDMA_READ_ENABLE = 0x01, + FW_RI_QP_RDMA_WRITE_ENABLE = 0x02, + FW_RI_QP_BIND_ENABLE = 0x04, + FW_RI_QP_FAST_REGISTER_ENABLE = 0x08, + FW_RI_QP_STAG0_ENABLE = 0x10 +}; + +enum fw_ri_addr_type { + FW_RI_ZERO_BASED_TO = 0x00, + FW_RI_VA_BASED_TO = 0x01 +}; + +enum fw_ri_mem_perms { + FW_RI_MEM_ACCESS_REM_WRITE = 0x01, + FW_RI_MEM_ACCESS_REM_READ = 0x02, + FW_RI_MEM_ACCESS_REM = 0x03, + FW_RI_MEM_ACCESS_LOCAL_WRITE = 0x04, + FW_RI_MEM_ACCESS_LOCAL_READ = 0x08, + FW_RI_MEM_ACCESS_LOCAL = 0x0C +}; + +enum fw_ri_stag_type { + FW_RI_STAG_NSMR = 0x00, + FW_RI_STAG_SMR = 0x01, + FW_RI_STAG_MW = 0x02, + FW_RI_STAG_MW_RELAXED = 0x03 +}; + +enum fw_ri_data_op { + FW_RI_DATA_IMMD = 0x81, + FW_RI_DATA_DSGL = 0x82, + FW_RI_DATA_ISGL = 0x83 +}; + +enum fw_ri_sgl_depth { + FW_RI_SGL_DEPTH_MAX_SQ = 16, + FW_RI_SGL_DEPTH_MAX_RQ = 4 +}; + +struct fw_ri_dsge_pair { + __be32 len[2]; + __be64 addr[2]; +}; + +struct fw_ri_dsgl { + __u8 op; + __u8 r1; + __be16 nsge; + __be32 len0; + __be64 addr0; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_dsge_pair sge[0]; +#endif +}; + +struct fw_ri_sge { + __be32 stag; + __be32 len; + __be64 to; +}; + +struct fw_ri_isgl { + __u8 op; + __u8 r1; + __be16 nsge; + __be32 r2; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_sge sge[0]; +#endif +}; + +struct fw_ri_immd { + __u8 op; + __u8 r1; + __be16 r2; + __be32 immdlen; +#ifndef C99_NOT_SUPPORTED + __u8 data[0]; +#endif +}; + +struct fw_ri_tpte { + __be32 valid_to_pdid; + __be32 locread_to_qpid; + __be32 nosnoop_pbladdr; + __be32 len_lo; + __be32 va_hi; + __be32 va_lo_fbo; + __be32 dca_mwbcnt_pstag; + __be32 len_hi; +}; + +#define S_FW_RI_TPTE_VALID 31 +#define M_FW_RI_TPTE_VALID 0x1 +#define V_FW_RI_TPTE_VALID(x) ((x) << S_FW_RI_TPTE_VALID) +#define G_FW_RI_TPTE_VALID(x) \ + (((x) >> S_FW_RI_TPTE_VALID) & M_FW_RI_TPTE_VALID) +#define F_FW_RI_TPTE_VALID V_FW_RI_TPTE_VALID(1U) + +#define S_FW_RI_TPTE_STAGKEY 23 +#define M_FW_RI_TPTE_STAGKEY 0xff +#define V_FW_RI_TPTE_STAGKEY(x) ((x) << S_FW_RI_TPTE_STAGKEY) +#define G_FW_RI_TPTE_STAGKEY(x) \ + (((x) >> S_FW_RI_TPTE_STAGKEY) & M_FW_RI_TPTE_STAGKEY) + +#define S_FW_RI_TPTE_STAGSTATE 22 +#define M_FW_RI_TPTE_STAGSTATE 0x1 +#define V_FW_RI_TPTE_STAGSTATE(x) ((x) << S_FW_RI_TPTE_STAGSTATE) +#define G_FW_RI_TPTE_STAGSTATE(x) \ + (((x) >> S_FW_RI_TPTE_STAGSTATE) & M_FW_RI_TPTE_STAGSTATE) +#define F_FW_RI_TPTE_STAGSTATE V_FW_RI_TPTE_STAGSTATE(1U) + +#define S_FW_RI_TPTE_STAGTYPE 20 +#define M_FW_RI_TPTE_STAGTYPE 0x3 +#define V_FW_RI_TPTE_STAGTYPE(x) ((x) << S_FW_RI_TPTE_STAGTYPE) +#define G_FW_RI_TPTE_STAGTYPE(x) \ + (((x) >> S_FW_RI_TPTE_STAGTYPE) & M_FW_RI_TPTE_STAGTYPE) + +#define S_FW_RI_TPTE_PDID 0 +#define M_FW_RI_TPTE_PDID 0xfffff +#define V_FW_RI_TPTE_PDID(x) ((x) << S_FW_RI_TPTE_PDID) +#define G_FW_RI_TPTE_PDID(x) \ + (((x) >> S_FW_RI_TPTE_PDID) & M_FW_RI_TPTE_PDID) + +#define S_FW_RI_TPTE_PERM 28 +#define M_FW_RI_TPTE_PERM 0xf +#define V_FW_RI_TPTE_PERM(x) ((x) << S_FW_RI_TPTE_PERM) +#define G_FW_RI_TPTE_PERM(x) \ + (((x) >> S_FW_RI_TPTE_PERM) & M_FW_RI_TPTE_PERM) + +#define S_FW_RI_TPTE_REMINVDIS 27 +#define M_FW_RI_TPTE_REMINVDIS 0x1 +#define V_FW_RI_TPTE_REMINVDIS(x) ((x) << S_FW_RI_TPTE_REMINVDIS) +#define G_FW_RI_TPTE_REMINVDIS(x) \ + (((x) >> S_FW_RI_TPTE_REMINVDIS) & M_FW_RI_TPTE_REMINVDIS) +#define F_FW_RI_TPTE_REMINVDIS V_FW_RI_TPTE_REMINVDIS(1U) + +#define S_FW_RI_TPTE_ADDRTYPE 26 +#define M_FW_RI_TPTE_ADDRTYPE 1 +#define V_FW_RI_TPTE_ADDRTYPE(x) ((x) << S_FW_RI_TPTE_ADDRTYPE) +#define G_FW_RI_TPTE_ADDRTYPE(x) \ + (((x) >> S_FW_RI_TPTE_ADDRTYPE) & M_FW_RI_TPTE_ADDRTYPE) +#define F_FW_RI_TPTE_ADDRTYPE V_FW_RI_TPTE_ADDRTYPE(1U) + +#define S_FW_RI_TPTE_MWBINDEN 25 +#define M_FW_RI_TPTE_MWBINDEN 0x1 +#define V_FW_RI_TPTE_MWBINDEN(x) ((x) << S_FW_RI_TPTE_MWBINDEN) +#define G_FW_RI_TPTE_MWBINDEN(x) \ + (((x) >> S_FW_RI_TPTE_MWBINDEN) & M_FW_RI_TPTE_MWBINDEN) +#define F_FW_RI_TPTE_MWBINDEN V_FW_RI_TPTE_MWBINDEN(1U) + +#define S_FW_RI_TPTE_PS 20 +#define M_FW_RI_TPTE_PS 0x1f +#define V_FW_RI_TPTE_PS(x) ((x) << S_FW_RI_TPTE_PS) +#define G_FW_RI_TPTE_PS(x) \ + (((x) >> S_FW_RI_TPTE_PS) & M_FW_RI_TPTE_PS) + +#define S_FW_RI_TPTE_QPID 0 +#define M_FW_RI_TPTE_QPID 0xfffff +#define V_FW_RI_TPTE_QPID(x) ((x) << S_FW_RI_TPTE_QPID) +#define G_FW_RI_TPTE_QPID(x) \ + (((x) >> S_FW_RI_TPTE_QPID) & M_FW_RI_TPTE_QPID) + +#define S_FW_RI_TPTE_NOSNOOP 30 +#define M_FW_RI_TPTE_NOSNOOP 0x1 +#define V_FW_RI_TPTE_NOSNOOP(x) ((x) << S_FW_RI_TPTE_NOSNOOP) +#define G_FW_RI_TPTE_NOSNOOP(x) \ + (((x) >> S_FW_RI_TPTE_NOSNOOP) & M_FW_RI_TPTE_NOSNOOP) +#define F_FW_RI_TPTE_NOSNOOP V_FW_RI_TPTE_NOSNOOP(1U) + +#define S_FW_RI_TPTE_PBLADDR 0 +#define M_FW_RI_TPTE_PBLADDR 0x1fffffff +#define V_FW_RI_TPTE_PBLADDR(x) ((x) << S_FW_RI_TPTE_PBLADDR) +#define G_FW_RI_TPTE_PBLADDR(x) \ + (((x) >> S_FW_RI_TPTE_PBLADDR) & M_FW_RI_TPTE_PBLADDR) + +#define S_FW_RI_TPTE_DCA 24 +#define M_FW_RI_TPTE_DCA 0x1f +#define V_FW_RI_TPTE_DCA(x) ((x) << S_FW_RI_TPTE_DCA) +#define G_FW_RI_TPTE_DCA(x) \ + (((x) >> S_FW_RI_TPTE_DCA) & M_FW_RI_TPTE_DCA) + +#define S_FW_RI_TPTE_MWBCNT_PSTAG 0 +#define M_FW_RI_TPTE_MWBCNT_PSTAG 0xffffff +#define V_FW_RI_TPTE_MWBCNT_PSTAT(x) \ + ((x) << S_FW_RI_TPTE_MWBCNT_PSTAG) +#define G_FW_RI_TPTE_MWBCNT_PSTAG(x) \ + (((x) >> S_FW_RI_TPTE_MWBCNT_PSTAG) & M_FW_RI_TPTE_MWBCNT_PSTAG) + +enum fw_ri_res_type { + FW_RI_RES_TYPE_SQ, + FW_RI_RES_TYPE_RQ, + FW_RI_RES_TYPE_CQ, +}; + +enum fw_ri_res_op { + FW_RI_RES_OP_WRITE, + FW_RI_RES_OP_RESET, +}; + +struct fw_ri_res { + union fw_ri_restype { + struct fw_ri_res_sqrq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + } sqrq; + struct fw_ri_res_cq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 iqid; + __be32 r4[2]; + __be32 iqandst_to_iqandstindex; + __be16 iqdroprss_to_iqesize; + __be16 iqsize; + __be64 iqaddr; + __be32 iqns_iqro; + __be32 r6_lo; + __be64 r7; + } cq; + } u; +}; + +struct fw_ri_res_wr { + __be32 op_nres; + __be32 len16_pkd; + __u64 cookie; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_res res[0]; +#endif +}; + +#define S_FW_RI_RES_WR_NRES 0 +#define M_FW_RI_RES_WR_NRES 0xff +#define V_FW_RI_RES_WR_NRES(x) ((x) << S_FW_RI_RES_WR_NRES) +#define G_FW_RI_RES_WR_NRES(x) \ + (((x) >> S_FW_RI_RES_WR_NRES) & M_FW_RI_RES_WR_NRES) + +#define S_FW_RI_RES_WR_FETCHSZM 26 +#define M_FW_RI_RES_WR_FETCHSZM 0x1 +#define V_FW_RI_RES_WR_FETCHSZM(x) ((x) << S_FW_RI_RES_WR_FETCHSZM) +#define G_FW_RI_RES_WR_FETCHSZM(x) \ + (((x) >> S_FW_RI_RES_WR_FETCHSZM) & M_FW_RI_RES_WR_FETCHSZM) +#define F_FW_RI_RES_WR_FETCHSZM V_FW_RI_RES_WR_FETCHSZM(1U) + +#define S_FW_RI_RES_WR_STATUSPGNS 25 +#define M_FW_RI_RES_WR_STATUSPGNS 0x1 +#define V_FW_RI_RES_WR_STATUSPGNS(x) ((x) << S_FW_RI_RES_WR_STATUSPGNS) +#define G_FW_RI_RES_WR_STATUSPGNS(x) \ + (((x) >> S_FW_RI_RES_WR_STATUSPGNS) & M_FW_RI_RES_WR_STATUSPGNS) +#define F_FW_RI_RES_WR_STATUSPGNS V_FW_RI_RES_WR_STATUSPGNS(1U) + +#define S_FW_RI_RES_WR_STATUSPGRO 24 +#define M_FW_RI_RES_WR_STATUSPGRO 0x1 +#define V_FW_RI_RES_WR_STATUSPGRO(x) ((x) << S_FW_RI_RES_WR_STATUSPGRO) +#define G_FW_RI_RES_WR_STATUSPGRO(x) \ + (((x) >> S_FW_RI_RES_WR_STATUSPGRO) & M_FW_RI_RES_WR_STATUSPGRO) +#define F_FW_RI_RES_WR_STATUSPGRO V_FW_RI_RES_WR_STATUSPGRO(1U) + +#define S_FW_RI_RES_WR_FETCHNS 23 +#define M_FW_RI_RES_WR_FETCHNS 0x1 +#define V_FW_RI_RES_WR_FETCHNS(x) ((x) << S_FW_RI_RES_WR_FETCHNS) +#define G_FW_RI_RES_WR_FETCHNS(x) \ + (((x) >> S_FW_RI_RES_WR_FETCHNS) & M_FW_RI_RES_WR_FETCHNS) +#define F_FW_RI_RES_WR_FETCHNS V_FW_RI_RES_WR_FETCHNS(1U) + +#define S_FW_RI_RES_WR_FETCHRO 22 +#define M_FW_RI_RES_WR_FETCHRO 0x1 +#define V_FW_RI_RES_WR_FETCHRO(x) ((x) << S_FW_RI_RES_WR_FETCHRO) +#define G_FW_RI_RES_WR_FETCHRO(x) \ + (((x) >> S_FW_RI_RES_WR_FETCHRO) & M_FW_RI_RES_WR_FETCHRO) +#define F_FW_RI_RES_WR_FETCHRO V_FW_RI_RES_WR_FETCHRO(1U) + +#define S_FW_RI_RES_WR_HOSTFCMODE 20 +#define M_FW_RI_RES_WR_HOSTFCMODE 0x3 +#define V_FW_RI_RES_WR_HOSTFCMODE(x) ((x) << S_FW_RI_RES_WR_HOSTFCMODE) +#define G_FW_RI_RES_WR_HOSTFCMODE(x) \ + (((x) >> S_FW_RI_RES_WR_HOSTFCMODE) & M_FW_RI_RES_WR_HOSTFCMODE) + +#define S_FW_RI_RES_WR_CPRIO 19 +#define M_FW_RI_RES_WR_CPRIO 0x1 +#define V_FW_RI_RES_WR_CPRIO(x) ((x) << S_FW_RI_RES_WR_CPRIO) +#define G_FW_RI_RES_WR_CPRIO(x) \ + (((x) >> S_FW_RI_RES_WR_CPRIO) & M_FW_RI_RES_WR_CPRIO) +#define F_FW_RI_RES_WR_CPRIO V_FW_RI_RES_WR_CPRIO(1U) + +#define S_FW_RI_RES_WR_ONCHIP 18 +#define M_FW_RI_RES_WR_ONCHIP 0x1 +#define V_FW_RI_RES_WR_ONCHIP(x) ((x) << S_FW_RI_RES_WR_ONCHIP) +#define G_FW_RI_RES_WR_ONCHIP(x) \ + (((x) >> S_FW_RI_RES_WR_ONCHIP) & M_FW_RI_RES_WR_ONCHIP) +#define F_FW_RI_RES_WR_ONCHIP V_FW_RI_RES_WR_ONCHIP(1U) + +#define S_FW_RI_RES_WR_PCIECHN 16 +#define M_FW_RI_RES_WR_PCIECHN 0x3 +#define V_FW_RI_RES_WR_PCIECHN(x) ((x) << S_FW_RI_RES_WR_PCIECHN) +#define G_FW_RI_RES_WR_PCIECHN(x) \ + (((x) >> S_FW_RI_RES_WR_PCIECHN) & M_FW_RI_RES_WR_PCIECHN) + +#define S_FW_RI_RES_WR_IQID 0 +#define M_FW_RI_RES_WR_IQID 0xffff +#define V_FW_RI_RES_WR_IQID(x) ((x) << S_FW_RI_RES_WR_IQID) +#define G_FW_RI_RES_WR_IQID(x) \ + (((x) >> S_FW_RI_RES_WR_IQID) & M_FW_RI_RES_WR_IQID) + +#define S_FW_RI_RES_WR_DCAEN 31 +#define M_FW_RI_RES_WR_DCAEN 0x1 +#define V_FW_RI_RES_WR_DCAEN(x) ((x) << S_FW_RI_RES_WR_DCAEN) +#define G_FW_RI_RES_WR_DCAEN(x) \ + (((x) >> S_FW_RI_RES_WR_DCAEN) & M_FW_RI_RES_WR_DCAEN) +#define F_FW_RI_RES_WR_DCAEN V_FW_RI_RES_WR_DCAEN(1U) + +#define S_FW_RI_RES_WR_DCACPU 26 +#define M_FW_RI_RES_WR_DCACPU 0x1f +#define V_FW_RI_RES_WR_DCACPU(x) ((x) << S_FW_RI_RES_WR_DCACPU) +#define G_FW_RI_RES_WR_DCACPU(x) \ + (((x) >> S_FW_RI_RES_WR_DCACPU) & M_FW_RI_RES_WR_DCACPU) + +#define S_FW_RI_RES_WR_FBMIN 23 +#define M_FW_RI_RES_WR_FBMIN 0x7 +#define V_FW_RI_RES_WR_FBMIN(x) ((x) << S_FW_RI_RES_WR_FBMIN) +#define G_FW_RI_RES_WR_FBMIN(x) \ + (((x) >> S_FW_RI_RES_WR_FBMIN) & M_FW_RI_RES_WR_FBMIN) + +#define S_FW_RI_RES_WR_FBMAX 20 +#define M_FW_RI_RES_WR_FBMAX 0x7 +#define V_FW_RI_RES_WR_FBMAX(x) ((x) << S_FW_RI_RES_WR_FBMAX) +#define G_FW_RI_RES_WR_FBMAX(x) \ + (((x) >> S_FW_RI_RES_WR_FBMAX) & M_FW_RI_RES_WR_FBMAX) + +#define S_FW_RI_RES_WR_CIDXFTHRESHO 19 +#define M_FW_RI_RES_WR_CIDXFTHRESHO 0x1 +#define V_FW_RI_RES_WR_CIDXFTHRESHO(x) ((x) << S_FW_RI_RES_WR_CIDXFTHRESHO) +#define G_FW_RI_RES_WR_CIDXFTHRESHO(x) \ + (((x) >> S_FW_RI_RES_WR_CIDXFTHRESHO) & M_FW_RI_RES_WR_CIDXFTHRESHO) +#define F_FW_RI_RES_WR_CIDXFTHRESHO V_FW_RI_RES_WR_CIDXFTHRESHO(1U) + +#define S_FW_RI_RES_WR_CIDXFTHRESH 16 +#define M_FW_RI_RES_WR_CIDXFTHRESH 0x7 +#define V_FW_RI_RES_WR_CIDXFTHRESH(x) ((x) << S_FW_RI_RES_WR_CIDXFTHRESH) +#define G_FW_RI_RES_WR_CIDXFTHRESH(x) \ + (((x) >> S_FW_RI_RES_WR_CIDXFTHRESH) & M_FW_RI_RES_WR_CIDXFTHRESH) + +#define S_FW_RI_RES_WR_EQSIZE 0 +#define M_FW_RI_RES_WR_EQSIZE 0xffff +#define V_FW_RI_RES_WR_EQSIZE(x) ((x) << S_FW_RI_RES_WR_EQSIZE) +#define G_FW_RI_RES_WR_EQSIZE(x) \ + (((x) >> S_FW_RI_RES_WR_EQSIZE) & M_FW_RI_RES_WR_EQSIZE) + +#define S_FW_RI_RES_WR_IQANDST 15 +#define M_FW_RI_RES_WR_IQANDST 0x1 +#define V_FW_RI_RES_WR_IQANDST(x) ((x) << S_FW_RI_RES_WR_IQANDST) +#define G_FW_RI_RES_WR_IQANDST(x) \ + (((x) >> S_FW_RI_RES_WR_IQANDST) & M_FW_RI_RES_WR_IQANDST) +#define F_FW_RI_RES_WR_IQANDST V_FW_RI_RES_WR_IQANDST(1U) + +#define S_FW_RI_RES_WR_IQANUS 14 +#define M_FW_RI_RES_WR_IQANUS 0x1 +#define V_FW_RI_RES_WR_IQANUS(x) ((x) << S_FW_RI_RES_WR_IQANUS) +#define G_FW_RI_RES_WR_IQANUS(x) \ + (((x) >> S_FW_RI_RES_WR_IQANUS) & M_FW_RI_RES_WR_IQANUS) +#define F_FW_RI_RES_WR_IQANUS V_FW_RI_RES_WR_IQANUS(1U) + +#define S_FW_RI_RES_WR_IQANUD 12 +#define M_FW_RI_RES_WR_IQANUD 0x3 +#define V_FW_RI_RES_WR_IQANUD(x) ((x) << S_FW_RI_RES_WR_IQANUD) +#define G_FW_RI_RES_WR_IQANUD(x) \ + (((x) >> S_FW_RI_RES_WR_IQANUD) & M_FW_RI_RES_WR_IQANUD) + +#define S_FW_RI_RES_WR_IQANDSTINDEX 0 +#define M_FW_RI_RES_WR_IQANDSTINDEX 0xfff +#define V_FW_RI_RES_WR_IQANDSTINDEX(x) ((x) << S_FW_RI_RES_WR_IQANDSTINDEX) +#define G_FW_RI_RES_WR_IQANDSTINDEX(x) \ + (((x) >> S_FW_RI_RES_WR_IQANDSTINDEX) & M_FW_RI_RES_WR_IQANDSTINDEX) + +#define S_FW_RI_RES_WR_IQDROPRSS 15 +#define M_FW_RI_RES_WR_IQDROPRSS 0x1 +#define V_FW_RI_RES_WR_IQDROPRSS(x) ((x) << S_FW_RI_RES_WR_IQDROPRSS) +#define G_FW_RI_RES_WR_IQDROPRSS(x) \ + (((x) >> S_FW_RI_RES_WR_IQDROPRSS) & M_FW_RI_RES_WR_IQDROPRSS) +#define F_FW_RI_RES_WR_IQDROPRSS V_FW_RI_RES_WR_IQDROPRSS(1U) + +#define S_FW_RI_RES_WR_IQGTSMODE 14 +#define M_FW_RI_RES_WR_IQGTSMODE 0x1 +#define V_FW_RI_RES_WR_IQGTSMODE(x) ((x) << S_FW_RI_RES_WR_IQGTSMODE) +#define G_FW_RI_RES_WR_IQGTSMODE(x) \ + (((x) >> S_FW_RI_RES_WR_IQGTSMODE) & M_FW_RI_RES_WR_IQGTSMODE) +#define F_FW_RI_RES_WR_IQGTSMODE V_FW_RI_RES_WR_IQGTSMODE(1U) + +#define S_FW_RI_RES_WR_IQPCIECH 12 +#define M_FW_RI_RES_WR_IQPCIECH 0x3 +#define V_FW_RI_RES_WR_IQPCIECH(x) ((x) << S_FW_RI_RES_WR_IQPCIECH) +#define G_FW_RI_RES_WR_IQPCIECH(x) \ + (((x) >> S_FW_RI_RES_WR_IQPCIECH) & M_FW_RI_RES_WR_IQPCIECH) + +#define S_FW_RI_RES_WR_IQDCAEN 11 +#define M_FW_RI_RES_WR_IQDCAEN 0x1 +#define V_FW_RI_RES_WR_IQDCAEN(x) ((x) << S_FW_RI_RES_WR_IQDCAEN) +#define G_FW_RI_RES_WR_IQDCAEN(x) \ + (((x) >> S_FW_RI_RES_WR_IQDCAEN) & M_FW_RI_RES_WR_IQDCAEN) +#define F_FW_RI_RES_WR_IQDCAEN V_FW_RI_RES_WR_IQDCAEN(1U) + +#define S_FW_RI_RES_WR_IQDCACPU 6 +#define M_FW_RI_RES_WR_IQDCACPU 0x1f +#define V_FW_RI_RES_WR_IQDCACPU(x) ((x) << S_FW_RI_RES_WR_IQDCACPU) +#define G_FW_RI_RES_WR_IQDCACPU(x) \ + (((x) >> S_FW_RI_RES_WR_IQDCACPU) & M_FW_RI_RES_WR_IQDCACPU) + +#define S_FW_RI_RES_WR_IQINTCNTTHRESH 4 +#define M_FW_RI_RES_WR_IQINTCNTTHRESH 0x3 +#define V_FW_RI_RES_WR_IQINTCNTTHRESH(x) \ + ((x) << S_FW_RI_RES_WR_IQINTCNTTHRESH) +#define G_FW_RI_RES_WR_IQINTCNTTHRESH(x) \ + (((x) >> S_FW_RI_RES_WR_IQINTCNTTHRESH) & M_FW_RI_RES_WR_IQINTCNTTHRESH) + +#define S_FW_RI_RES_WR_IQO 3 +#define M_FW_RI_RES_WR_IQO 0x1 +#define V_FW_RI_RES_WR_IQO(x) ((x) << S_FW_RI_RES_WR_IQO) +#define G_FW_RI_RES_WR_IQO(x) \ + (((x) >> S_FW_RI_RES_WR_IQO) & M_FW_RI_RES_WR_IQO) +#define F_FW_RI_RES_WR_IQO V_FW_RI_RES_WR_IQO(1U) + +#define S_FW_RI_RES_WR_IQCPRIO 2 +#define M_FW_RI_RES_WR_IQCPRIO 0x1 +#define V_FW_RI_RES_WR_IQCPRIO(x) ((x) << S_FW_RI_RES_WR_IQCPRIO) +#define G_FW_RI_RES_WR_IQCPRIO(x) \ + (((x) >> S_FW_RI_RES_WR_IQCPRIO) & M_FW_RI_RES_WR_IQCPRIO) +#define F_FW_RI_RES_WR_IQCPRIO V_FW_RI_RES_WR_IQCPRIO(1U) + +#define S_FW_RI_RES_WR_IQESIZE 0 +#define M_FW_RI_RES_WR_IQESIZE 0x3 +#define V_FW_RI_RES_WR_IQESIZE(x) ((x) << S_FW_RI_RES_WR_IQESIZE) +#define G_FW_RI_RES_WR_IQESIZE(x) \ + (((x) >> S_FW_RI_RES_WR_IQESIZE) & M_FW_RI_RES_WR_IQESIZE) + +#define S_FW_RI_RES_WR_IQNS 31 +#define M_FW_RI_RES_WR_IQNS 0x1 +#define V_FW_RI_RES_WR_IQNS(x) ((x) << S_FW_RI_RES_WR_IQNS) +#define G_FW_RI_RES_WR_IQNS(x) \ + (((x) >> S_FW_RI_RES_WR_IQNS) & M_FW_RI_RES_WR_IQNS) +#define F_FW_RI_RES_WR_IQNS V_FW_RI_RES_WR_IQNS(1U) + +#define S_FW_RI_RES_WR_IQRO 30 +#define M_FW_RI_RES_WR_IQRO 0x1 +#define V_FW_RI_RES_WR_IQRO(x) ((x) << S_FW_RI_RES_WR_IQRO) +#define G_FW_RI_RES_WR_IQRO(x) \ + (((x) >> S_FW_RI_RES_WR_IQRO) & M_FW_RI_RES_WR_IQRO) +#define F_FW_RI_RES_WR_IQRO V_FW_RI_RES_WR_IQRO(1U) + +struct fw_ri_rdma_write_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be64 r2; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; +#ifndef C99_NOT_SUPPORTED + union { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + +struct fw_ri_send_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be32 sendop_pkd; + __be32 stag_inv; + __be32 plen; + __be32 r3; + __be64 r4; +#ifndef C99_NOT_SUPPORTED + union { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + +#define S_FW_RI_SEND_WR_SENDOP 0 +#define M_FW_RI_SEND_WR_SENDOP 0xf +#define V_FW_RI_SEND_WR_SENDOP(x) ((x) << S_FW_RI_SEND_WR_SENDOP) +#define G_FW_RI_SEND_WR_SENDOP(x) \ + (((x) >> S_FW_RI_SEND_WR_SENDOP) & M_FW_RI_SEND_WR_SENDOP) + +struct fw_ri_rdma_read_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be64 r2; + __be32 stag_sink; + __be32 to_sink_hi; + __be32 to_sink_lo; + __be32 plen; + __be32 stag_src; + __be32 to_src_hi; + __be32 to_src_lo; + __be32 r5; +}; + +struct fw_ri_recv_wr { + __u8 opcode; + __u8 r1; + __u16 wrid; + __u8 r2[3]; + __u8 len16; + struct fw_ri_isgl isgl; +}; + +struct fw_ri_bind_mw_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 qpbinde_to_dcacpu; + __u8 pgsz_shift; + __u8 addr_type; + __u8 mem_perms; + __be32 stag_mr; + __be32 stag_mw; + __be32 r3; + __be64 len_mw; + __be64 va_fbo; + __be64 r4; +}; + +#define S_FW_RI_BIND_MW_WR_QPBINDE 6 +#define M_FW_RI_BIND_MW_WR_QPBINDE 0x1 +#define V_FW_RI_BIND_MW_WR_QPBINDE(x) ((x) << S_FW_RI_BIND_MW_WR_QPBINDE) +#define G_FW_RI_BIND_MW_WR_QPBINDE(x) \ + (((x) >> S_FW_RI_BIND_MW_WR_QPBINDE) & M_FW_RI_BIND_MW_WR_QPBINDE) +#define F_FW_RI_BIND_MW_WR_QPBINDE V_FW_RI_BIND_MW_WR_QPBINDE(1U) + +#define S_FW_RI_BIND_MW_WR_NS 5 +#define M_FW_RI_BIND_MW_WR_NS 0x1 +#define V_FW_RI_BIND_MW_WR_NS(x) ((x) << S_FW_RI_BIND_MW_WR_NS) +#define G_FW_RI_BIND_MW_WR_NS(x) \ + (((x) >> S_FW_RI_BIND_MW_WR_NS) & M_FW_RI_BIND_MW_WR_NS) +#define F_FW_RI_BIND_MW_WR_NS V_FW_RI_BIND_MW_WR_NS(1U) + +#define S_FW_RI_BIND_MW_WR_DCACPU 0 +#define M_FW_RI_BIND_MW_WR_DCACPU 0x1f +#define V_FW_RI_BIND_MW_WR_DCACPU(x) ((x) << S_FW_RI_BIND_MW_WR_DCACPU) +#define G_FW_RI_BIND_MW_WR_DCACPU(x) \ + (((x) >> S_FW_RI_BIND_MW_WR_DCACPU) & M_FW_RI_BIND_MW_WR_DCACPU) + +struct fw_ri_fr_nsmr_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 qpbinde_to_dcacpu; + __u8 pgsz_shift; + __u8 addr_type; + __u8 mem_perms; + __be32 stag; + __be32 len_hi; + __be32 len_lo; + __be32 va_hi; + __be32 va_lo_fbo; +}; + +#define S_FW_RI_FR_NSMR_WR_QPBINDE 6 +#define M_FW_RI_FR_NSMR_WR_QPBINDE 0x1 +#define V_FW_RI_FR_NSMR_WR_QPBINDE(x) ((x) << S_FW_RI_FR_NSMR_WR_QPBINDE) +#define G_FW_RI_FR_NSMR_WR_QPBINDE(x) \ + (((x) >> S_FW_RI_FR_NSMR_WR_QPBINDE) & M_FW_RI_FR_NSMR_WR_QPBINDE) +#define F_FW_RI_FR_NSMR_WR_QPBINDE V_FW_RI_FR_NSMR_WR_QPBINDE(1U) + +#define S_FW_RI_FR_NSMR_WR_NS 5 +#define M_FW_RI_FR_NSMR_WR_NS 0x1 +#define V_FW_RI_FR_NSMR_WR_NS(x) ((x) << S_FW_RI_FR_NSMR_WR_NS) +#define G_FW_RI_FR_NSMR_WR_NS(x) \ + (((x) >> S_FW_RI_FR_NSMR_WR_NS) & M_FW_RI_FR_NSMR_WR_NS) +#define F_FW_RI_FR_NSMR_WR_NS V_FW_RI_FR_NSMR_WR_NS(1U) + +#define S_FW_RI_FR_NSMR_WR_DCACPU 0 +#define M_FW_RI_FR_NSMR_WR_DCACPU 0x1f +#define V_FW_RI_FR_NSMR_WR_DCACPU(x) ((x) << S_FW_RI_FR_NSMR_WR_DCACPU) +#define G_FW_RI_FR_NSMR_WR_DCACPU(x) \ + (((x) >> S_FW_RI_FR_NSMR_WR_DCACPU) & M_FW_RI_FR_NSMR_WR_DCACPU) + +struct fw_ri_inv_lstag_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be32 r2; + __be32 stag_inv; +}; + +enum fw_ri_type { + FW_RI_TYPE_INIT, + FW_RI_TYPE_FINI, + FW_RI_TYPE_TERMINATE +}; + +enum fw_ri_init_p2ptype { + FW_RI_INIT_P2PTYPE_RDMA_WRITE = FW_RI_RDMA_WRITE, + FW_RI_INIT_P2PTYPE_READ_REQ = FW_RI_READ_REQ, + FW_RI_INIT_P2PTYPE_SEND = FW_RI_SEND, + FW_RI_INIT_P2PTYPE_SEND_WITH_INV = FW_RI_SEND_WITH_INV, + FW_RI_INIT_P2PTYPE_SEND_WITH_SE = FW_RI_SEND_WITH_SE, + FW_RI_INIT_P2PTYPE_SEND_WITH_SE_INV = FW_RI_SEND_WITH_SE_INV, + FW_RI_INIT_P2PTYPE_DISABLED = 0xf, +}; + +struct fw_ri_wr { + __be32 op_compl; + __be32 flowid_len16; + __u64 cookie; + union fw_ri { + struct fw_ri_init { + __u8 type; + __u8 mpareqbit_p2ptype; + __u8 r4[2]; + __u8 mpa_attrs; + __u8 qp_caps; + __be16 nrqe; + __be32 pdid; + __be32 qpid; + __be32 sq_eqid; + __be32 rq_eqid; + __be32 scqid; + __be32 rcqid; + __be32 ord_max; + __be32 ird_max; + __be32 iss; + __be32 irs; + __be32 hwrqsize; + __be32 hwrqaddr; + __be64 r5; + union fw_ri_init_p2p { + struct fw_ri_rdma_write_wr write; + struct fw_ri_rdma_read_wr read; + struct fw_ri_send_wr send; + } u; + } init; + struct fw_ri_fini { + __u8 type; + __u8 r3[7]; + __be64 r4; + } fini; + struct fw_ri_terminate { + __u8 type; + __u8 r3[3]; + __be32 immdlen; + __u8 termmsg[40]; + } terminate; + } u; +}; + +#define S_FW_RI_WR_MPAREQBIT 7 +#define M_FW_RI_WR_MPAREQBIT 0x1 +#define V_FW_RI_WR_MPAREQBIT(x) ((x) << S_FW_RI_WR_MPAREQBIT) +#define G_FW_RI_WR_MPAREQBIT(x) \ + (((x) >> S_FW_RI_WR_MPAREQBIT) & M_FW_RI_WR_MPAREQBIT) +#define F_FW_RI_WR_MPAREQBIT V_FW_RI_WR_MPAREQBIT(1U) + +#define S_FW_RI_WR_P2PTYPE 0 +#define M_FW_RI_WR_P2PTYPE 0xf +#define V_FW_RI_WR_P2PTYPE(x) ((x) << S_FW_RI_WR_P2PTYPE) +#define G_FW_RI_WR_P2PTYPE(x) \ + (((x) >> S_FW_RI_WR_P2PTYPE) & M_FW_RI_WR_P2PTYPE) + +struct tcp_options { + __be16 mss; + __u8 wsf; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8:4; + __u8 unknown:1; + __u8:1; + __u8 sack:1; + __u8 tstamp:1; +#else + __u8 tstamp:1; + __u8 sack:1; + __u8:1; + __u8 unknown:1; + __u8:4; +#endif +}; + +struct cpl_pass_accept_req { + union opcode_tid ot; + __be16 rsvd; + __be16 len; + __be32 hdr_len; + __be16 vlan; + __be16 l2info; + __be32 tos_stid; + struct tcp_options tcpopt; +}; + +/* cpl_pass_accept_req.hdr_len fields */ +#define S_SYN_RX_CHAN 0 +#define M_SYN_RX_CHAN 0xF +#define V_SYN_RX_CHAN(x) ((x) << S_SYN_RX_CHAN) +#define G_SYN_RX_CHAN(x) (((x) >> S_SYN_RX_CHAN) & M_SYN_RX_CHAN) + +#define S_TCP_HDR_LEN 10 +#define M_TCP_HDR_LEN 0x3F +#define V_TCP_HDR_LEN(x) ((x) << S_TCP_HDR_LEN) +#define G_TCP_HDR_LEN(x) (((x) >> S_TCP_HDR_LEN) & M_TCP_HDR_LEN) + +#define S_IP_HDR_LEN 16 +#define M_IP_HDR_LEN 0x3FF +#define V_IP_HDR_LEN(x) ((x) << S_IP_HDR_LEN) +#define G_IP_HDR_LEN(x) (((x) >> S_IP_HDR_LEN) & M_IP_HDR_LEN) + +#define S_ETH_HDR_LEN 26 +#define M_ETH_HDR_LEN 0x1F +#define V_ETH_HDR_LEN(x) ((x) << S_ETH_HDR_LEN) +#define G_ETH_HDR_LEN(x) (((x) >> S_ETH_HDR_LEN) & M_ETH_HDR_LEN) + +/* cpl_pass_accept_req.l2info fields */ +#define S_SYN_MAC_IDX 0 +#define M_SYN_MAC_IDX 0x1FF +#define V_SYN_MAC_IDX(x) ((x) << S_SYN_MAC_IDX) +#define G_SYN_MAC_IDX(x) (((x) >> S_SYN_MAC_IDX) & M_SYN_MAC_IDX) + +#define S_SYN_XACT_MATCH 9 +#define V_SYN_XACT_MATCH(x) ((x) << S_SYN_XACT_MATCH) +#define F_SYN_XACT_MATCH V_SYN_XACT_MATCH(1U) + +#define S_SYN_INTF 12 +#define M_SYN_INTF 0xF +#define V_SYN_INTF(x) ((x) << S_SYN_INTF) +#define G_SYN_INTF(x) (((x) >> S_SYN_INTF) & M_SYN_INTF) + +struct ulptx_idata { + __be32 cmd_more; + __be32 len; +}; + +#define S_ULPTX_NSGE 0 +#define M_ULPTX_NSGE 0xFFFF +#define V_ULPTX_NSGE(x) ((x) << S_ULPTX_NSGE) + +#define S_RX_DACK_MODE 29 +#define M_RX_DACK_MODE 0x3 +#define V_RX_DACK_MODE(x) ((x) << S_RX_DACK_MODE) +#define G_RX_DACK_MODE(x) (((x) >> S_RX_DACK_MODE) & M_RX_DACK_MODE) + +#define S_RX_DACK_CHANGE 31 +#define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) +#define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) + +#endif /* _T4FW_RI_API_H_ */ diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h new file mode 100644 index 00000000..e6669d54 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __C4IW_USER_H__ +#define __C4IW_USER_H__ + +#define C4IW_UVERBS_ABI_VERSION 1 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ +struct c4iw_create_cq_resp { + __u64 key; + __u64 gts_key; + __u64 memsize; + __u32 cqid; + __u32 size; + __u32 qid_mask; +}; + + +enum { + C4IW_QPF_ONCHIP = (1<<0) +}; + +struct c4iw_create_qp_resp { + __u64 ma_sync_key; + __u64 sq_key; + __u64 rq_key; + __u64 sq_db_gts_key; + __u64 rq_db_gts_key; + __u64 sq_memsize; + __u64 rq_memsize; + __u32 sqid; + __u32 rqid; + __u32 sq_size; + __u32 rq_size; + __u32 qid_mask; + __u32 flags; +}; +#endif diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig new file mode 100644 index 00000000..59f807d8 --- /dev/null +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -0,0 +1,9 @@ +config INFINIBAND_EHCA + tristate "eHCA support" + depends on IBMEBUS + ---help--- + This driver supports the IBM pSeries eHCA InfiniBand adapter. + + To compile the driver as a module, choose M here. The module + will be called ib_ehca. + diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile new file mode 100644 index 00000000..74d284e4 --- /dev/null +++ b/drivers/infiniband/hw/ehca/Makefile @@ -0,0 +1,16 @@ +# Authors: Heiko J Schick +# Christoph Raisch +# Joachim Fenkes +# +# Copyright (c) 2005 IBM Corporation +# +# All rights reserved. +# +# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. + +obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o + +ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ + ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ + ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o + diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c new file mode 100644 index 00000000..46592631 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -0,0 +1,277 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * address vector functions + * + * Authors: Hoang-Nam Nguyen + * Khadija Souissi + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static struct kmem_cache *av_cache; + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + int ret; + struct ehca_av *av; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + + av = kmem_cache_alloc(av_cache, GFP_KERNEL); + if (!av) { + ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", + pd, ah_attr); + return ERR_PTR(-ENOMEM); + } + + av->av.sl = ah_attr->sl; + av->av.dlid = ah_attr->dlid; + av->av.slid_path_bits = ah_attr->src_path_bits; + + if (ehca_static_rate < 0) { + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; + } else + av->av.ipd = ehca_static_rate; + + av->av.lnh = ah_attr->ah_flags; + av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(pd->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + ehca_err(pd->device, "Invalid port number " + "ehca_query_port() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(pd->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + ehca_err(pd->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); + } + av->av.pmtu = shca->max_mtu; + + /* dgid comes in grh.word_3 */ + memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + return &av->ib_ah; + +create_ah_exit1: + kmem_cache_free(av_cache, av); + + return ERR_PTR(ret); +} + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av; + struct ehca_ud_av new_ehca_av; + struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, + ib_device); + + memset(&new_ehca_av, 0, sizeof(new_ehca_av)); + new_ehca_av.sl = ah_attr->sl; + new_ehca_av.dlid = ah_attr->dlid; + new_ehca_av.slid_path_bits = ah_attr->src_path_bits; + new_ehca_av.ipd = ah_attr->static_rate; + new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, + (ah_attr->ah_flags & IB_AH_GRH) > 0); + new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); + + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(ah->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ehca_err(ah->device, "Invalid port number " + "ehca_query_port() returned %x " + "ah=%p ah_attr=%p port_num=%x", + rc, ah, ah_attr, ah_attr->port_num); + return -EINVAL; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(ah->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ehca_err(ah->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "ah=%p ah_attr=%p port_num=%x " + "sgid_index=%x", + rc, ah, ah_attr, ah_attr->port_num, + ah_attr->grh.sgid_index); + return -EINVAL; + } + memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); + } + + new_ehca_av.pmtu = shca->max_mtu; + + memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + av = container_of(ah, struct ehca_av, ib_ah); + av->av = new_ehca_av; + + return 0; +} + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); + + memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, + sizeof(ah_attr->grh.dgid)); + ah_attr->sl = av->av.sl; + + ah_attr->dlid = av->av.dlid; + + ah_attr->src_path_bits = av->av.slid_path_bits; + ah_attr->static_rate = av->av.ipd; + ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); + ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, + av->av.grh.word_0); + ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, + av->av.grh.word_0); + ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, + av->av.grh.word_0); + + return 0; +} + +int ehca_destroy_ah(struct ib_ah *ah) +{ + kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); + + return 0; +} + +int ehca_init_av_cache(void) +{ + av_cache = kmem_cache_create("ehca_cache_av", + sizeof(struct ehca_av), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!av_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_av_cache(void) +{ + if (av_cache) + kmem_cache_destroy(av_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h new file mode 100644 index 00000000..f08f6eaf --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -0,0 +1,482 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Struct definition for eHCA internal structures + * + * Authors: Heiko J Schick + * Christoph Raisch + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_H__ +#define __EHCA_CLASSES_H__ + +struct ehca_module; +struct ehca_qp; +struct ehca_cq; +struct ehca_eq; +struct ehca_mr; +struct ehca_mw; +struct ehca_pd; +struct ehca_av; + +#include +#include + +#include +#include + +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif +#include "ipz_pt_fn.h" +#include "ehca_qes.h" +#include "ehca_irq.h" + +#define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff + +struct ehca_eqe_cache_entry { + struct ehca_eqe *eqe; + struct ehca_cq *cq; +}; + +struct ehca_eq { + u32 length; + struct ipz_queue ipz_queue; + struct ipz_eq_handle ipz_eq_handle; + struct work_struct work; + struct h_galpas galpas; + int is_initialized; + struct ehca_pfeq pf; + spinlock_t spinlock; + struct tasklet_struct interrupt_task; + u32 ist; + spinlock_t irq_spinlock; + struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; +}; + +struct ehca_sma_attr { + u16 lid, lmc, sm_sl, sm_lid; + u16 pkey_tbl_len, pkeys[16]; +}; + +struct ehca_sport { + struct ib_cq *ibcq_aqp1; + struct ib_qp *ibqp_sqp[2]; + /* lock to serialze modify_qp() calls for sqp in normal + * and irq path (when event PORT_ACTIVE is received first time) + */ + spinlock_t mod_sqp_lock; + enum ib_port_state port_state; + struct ehca_sma_attr saved_attr; + u32 pma_qp_nr; +}; + +#define HCA_CAP_MR_PGSIZE_4K 0x80000000 +#define HCA_CAP_MR_PGSIZE_64K 0x40000000 +#define HCA_CAP_MR_PGSIZE_1M 0x20000000 +#define HCA_CAP_MR_PGSIZE_16M 0x10000000 + +struct ehca_shca { + struct ib_device ib_device; + struct platform_device *ofdev; + u8 num_ports; + int hw_level; + struct list_head shca_list; + struct ipz_adapter_handle ipz_hca_handle; + struct ehca_sport sport[2]; + struct ehca_eq eq; + struct ehca_eq neq; + struct ehca_mr *maxmr; + struct ehca_pd *pd; + struct h_galpas galpas; + struct mutex modify_mutex; + u64 hca_cap; + /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ + u32 hca_cap_mr_pgsize; + int max_mtu; + int max_num_qps; + int max_num_cqs; + atomic_t num_cqs; + atomic_t num_qps; +}; + +struct ehca_pd { + struct ib_pd ib_pd; + struct ipz_pd fw_pd; + /* small queue mgmt */ + struct mutex lock; + struct list_head free[2]; + struct list_head full[2]; +}; + +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, +}; + +/* struct to cache modify_qp()'s parms for GSI/SMI qp */ +struct ehca_mod_qp_parm { + int mask; + struct ib_qp_attr attr; +}; + +#define EHCA_MOD_QP_PARM_MAX 4 + +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u8 reported; + u8 cqe_req; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ +}; + +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + +struct ehca_qp { + union { + struct ib_qp ib_qp; + struct ib_srq ib_srq; + }; + u32 qp_type; + enum ehca_ext_qp_type ext_type; + enum ib_qp_state state; + struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; + struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; + struct h_galpas galpas; + u32 qkey; + u32 real_qp_num; + u32 token; + spinlock_t spinlock_s; + spinlock_t spinlock_r; + u32 sq_max_inline_data_size; + struct ipz_qp_handle ipz_qp_handle; + struct ehca_pfqp pf; + struct ib_qp_init_attr init_attr; + struct ehca_cq *send_cq; + struct ehca_cq *recv_cq; + unsigned int sqerr_purgeflag; + struct hlist_node list_entries; + /* array to cache modify_qp()'s parms for GSI/SMI qp */ + struct ehca_mod_qp_parm *mod_qp_parm; + int mod_qp_parm_idx; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; + /* unsolicited ack circumvention */ + int unsol_ack_circ; + int mtu_shift; + u32 message_count; + u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; + int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; +}; + +#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) +#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) +#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) + +/* must be power of 2 */ +#define QP_HASHTAB_LEN 8 + +struct ehca_cq { + struct ib_cq ib_cq; + struct ipz_queue ipz_queue; + struct h_galpas galpas; + spinlock_t spinlock; + u32 cq_number; + u32 token; + u32 nr_of_entries; + struct ipz_cq_handle ipz_cq_handle; + struct ehca_pfcq pf; + spinlock_t cb_lock; + struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; + struct list_head entry; + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + atomic_t nr_events; /* #events seen */ + wait_queue_head_t wait_completion; + spinlock_t task_lock; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; +}; + +enum ehca_mr_flag { + EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ + EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ +}; + +struct ehca_mr { + union { + struct ib_mr ib_mr; /* must always be first in ehca_mr */ + struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ + } ib; + struct ib_umem *umem; + spinlock_t mrlock; + + enum ehca_mr_flag flags; + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ + u64 hwpage_size; /* hw page size used for this MR */ + int acl; /* ACL (stored here for usage in reregister) */ + u64 *start; /* virtual start address (stored here for */ + /* usage in reregister) */ + u64 size; /* size (stored here for usage in reregister) */ + u32 fmr_page_size; /* page size for FMR */ + u32 fmr_max_pages; /* max pages for FMR */ + u32 fmr_max_maps; /* max outstanding maps for FMR */ + u32 fmr_map_cnt; /* map counter for FMR */ + /* fw specific data */ + struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ + struct h_galpas galpas; +}; + +struct ehca_mw { + struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ + spinlock_t mwlock; + + u8 never_bound; /* indication MW was never bound */ + struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ + struct h_galpas galpas; +}; + +enum ehca_mr_pgi_type { + EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, + * ehca_rereg_phys_mr, + * ehca_reg_internal_maxmr */ + EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ + EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ +}; + +struct ehca_mr_pginfo { + enum ehca_mr_pgi_type type; + u64 num_kpages; + u64 kpage_cnt; + u64 hwpage_size; /* hw page size used for this MR */ + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; +}; + +/* output parameters for MR/FMR hipz calls */ +struct ehca_mr_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 lkey; + u32 rkey; + u64 len; + u64 vaddr; + u32 acl; +}; + +/* output parameters for MW hipz calls */ +struct ehca_mw_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 rkey; +}; + +struct ehca_av { + struct ib_ah ib_ah; + struct ehca_ud_av av; +}; + +struct ehca_ucontext { + struct ib_ucontext ib_ucontext; +}; + +int ehca_init_pd_cache(void); +void ehca_cleanup_pd_cache(void); +int ehca_init_cq_cache(void); +void ehca_cleanup_cq_cache(void); +int ehca_init_qp_cache(void); +void ehca_cleanup_qp_cache(void); +int ehca_init_av_cache(void); +void ehca_cleanup_av_cache(void); +int ehca_init_mrmw_cache(void); +void ehca_cleanup_mrmw_cache(void); +int ehca_init_small_qp_cache(void); +void ehca_cleanup_small_qp_cache(void); + +extern rwlock_t ehca_qp_idr_lock; +extern rwlock_t ehca_cq_idr_lock; +extern struct idr ehca_qp_idr; +extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; + +extern int ehca_static_rate; +extern int ehca_port_act_time; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; +extern int ehca_lock_hcalls; +extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; + +struct ipzu_queue_resp { + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; + u32 offset; /* save offset within a page for small_qp */ +}; + +struct ehca_create_cq_resp { + u32 cq_number; + u32 token; + struct ipzu_queue_resp ipz_queue; + u32 fw_handle_ofs; + u32 dummy; +}; + +struct ehca_create_qp_resp { + u32 qp_num; + u32 token; + u32 qp_type; + u32 ext_type; + u32 qkey; + /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ + u32 real_qp_num; + u32 fw_handle_ofs; + u32 dummy; + struct ipzu_queue_resp ipz_squeue; + struct ipzu_queue_resp ipz_rqueue; +}; + +struct ehca_alloc_cq_parms { + u32 nr_cqe; + u32 act_nr_of_entries; + u32 act_pages; + struct ipz_eq_handle eq_handle; +}; + +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3, +}; + +enum ehca_ll_comp_flags { + LLQP_SEND_COMP = 0x20, + LLQP_RECV_COMP = 0x40, + LLQP_COMP_MASK = 0x60, +}; + +struct ehca_alloc_queue_parms { + /* input parameters */ + int max_wr; + int max_sge; + int page_size; + int is_small; + + /* output parameters */ + u16 act_nr_wqes; + u8 act_nr_sges; + u32 queue_size; /* bytes for small queues, pages otherwise */ +}; + +struct ehca_alloc_qp_parms { + struct ehca_alloc_queue_parms squeue; + struct ehca_alloc_queue_parms rqueue; + + /* input parameters */ + enum ehca_service_type servicetype; + int qp_storage; + int sigtype; + enum ehca_ext_qp_type ext_type; + enum ehca_ll_comp_flags ll_comp_flags; + int ud_av_l_key_ctl; + + u32 token; + struct ipz_eq_handle eq_handle; + struct ipz_pd pd; + struct ipz_cq_handle send_cq_handle, recv_cq_handle; + + u32 srq_qpn, srq_token, srq_limit; + + /* output parameters */ + u32 real_qp_num; + struct ipz_qp_handle qp_handle; + struct h_galpas galpas; +}; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h new file mode 100644 index 00000000..689c3578 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -0,0 +1,208 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * pSeries interface definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_PSERIES_H__ +#define __EHCA_CLASSES_PSERIES_H__ + +#include "hcp_phyp.h" +#include "ipz_pt_fn.h" + + +struct ehca_pfqp { + struct ipz_qpt sqpt; + struct ipz_qpt rqpt; +}; + +struct ehca_pfcq { + struct ipz_qpt qpt; + u32 cqnr; +}; + +struct ehca_pfeq { + struct ipz_qpt qpt; + struct h_galpa galpa; + u32 eqnr; +}; + +struct ipz_adapter_handle { + u64 handle; +}; + +struct ipz_cq_handle { + u64 handle; +}; + +struct ipz_eq_handle { + u64 handle; +}; + +struct ipz_qp_handle { + u64 handle; +}; +struct ipz_mrmw_handle { + u64 handle; +}; + +struct ipz_pd { + u32 value; +}; + +struct hcp_modify_qp_control_block { + u32 qkey; /* 00 */ + u32 rdd; /* reliable datagram domain */ + u32 send_psn; /* 02 */ + u32 receive_psn; /* 03 */ + u32 prim_phys_port; /* 04 */ + u32 alt_phys_port; /* 05 */ + u32 prim_p_key_idx; /* 06 */ + u32 alt_p_key_idx; /* 07 */ + u32 rdma_atomic_ctrl; /* 08 */ + u32 qp_state; /* 09 */ + u32 reserved_10; /* 10 */ + u32 rdma_nr_atomic_resp_res; /* 11 */ + u32 path_migration_state; /* 12 */ + u32 rdma_atomic_outst_dest_qp; /* 13 */ + u32 dest_qp_nr; /* 14 */ + u32 min_rnr_nak_timer_field; /* 15 */ + u32 service_level; /* 16 */ + u32 send_grh_flag; /* 17 */ + u32 retry_count; /* 18 */ + u32 timeout; /* 19 */ + u32 path_mtu; /* 20 */ + u32 max_static_rate; /* 21 */ + u32 dlid; /* 22 */ + u32 rnr_retry_count; /* 23 */ + u32 source_path_bits; /* 24 */ + u32 traffic_class; /* 25 */ + u32 hop_limit; /* 26 */ + u32 source_gid_idx; /* 27 */ + u32 flow_label; /* 28 */ + u32 reserved_29; /* 29 */ + union { /* 30 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid; + u32 service_level_al; /* 34 */ + u32 send_grh_flag_al; /* 35 */ + u32 retry_count_al; /* 36 */ + u32 timeout_al; /* 37 */ + u32 max_static_rate_al; /* 38 */ + u32 dlid_al; /* 39 */ + u32 rnr_retry_count_al; /* 40 */ + u32 source_path_bits_al; /* 41 */ + u32 traffic_class_al; /* 42 */ + u32 hop_limit_al; /* 43 */ + u32 source_gid_idx_al; /* 44 */ + u32 flow_label_al; /* 45 */ + u32 reserved_46; /* 46 */ + u32 reserved_47; /* 47 */ + union { /* 48 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid_al; + u32 max_nr_outst_send_wr; /* 52 */ + u32 max_nr_outst_recv_wr; /* 53 */ + u32 disable_ete_credit_check; /* 54 */ + u32 qp_number; /* 55 */ + u64 send_queue_handle; /* 56 */ + u64 recv_queue_handle; /* 58 */ + u32 actual_nr_sges_in_sq_wqe; /* 60 */ + u32 actual_nr_sges_in_rq_wqe; /* 61 */ + u32 qp_enable; /* 62 */ + u32 curr_srq_limit; /* 63 */ + u64 qp_aff_asyn_ev_log_reg; /* 64 */ + u64 shared_rq_hndl; /* 66 */ + u64 trigg_doorbell_qp_hndl; /* 68 */ + u32 reserved_70_127[58]; /* 70 */ +}; + +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) + +#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c new file mode 100644 index 00000000..d9b0ebcb --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -0,0 +1,404 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Completion queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_iverbs.h" +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "hcp_if.h" + +static struct kmem_cache *cq_cache; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) +{ + unsigned int qp_num = qp->real_qp_num; + unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); + spin_unlock_irqrestore(&cq->spinlock, flags); + + ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", + cq->cq_number, qp_num); + + return 0; +} + +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) +{ + int ret = -EINVAL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + hlist_del(iter); + ehca_dbg(cq->ib_cq.device, + "removed qp from cq .cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + ret = 0; + break; + } + } + spin_unlock_irqrestore(&cq->spinlock, flags); + if (ret) + ehca_err(cq->ib_cq.device, + "qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + + return ret; +} + +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +{ + struct ehca_qp *ret = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + ret = qp; + break; + } + } + return ret; +} + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + static const u32 additional_cqe = 20; + struct ib_cq *cq; + struct ehca_cq *my_cq; + struct ehca_shca *shca = + container_of(device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle; + struct ehca_alloc_cq_parms param; /* h_call's out parameters */ + struct h_galpa gal; + void *vpage; + u32 counter; + u64 rpage, cqx_fec, h_ret; + int ipz_rc, ret, i; + unsigned long flags; + + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", shca->max_num_cqs); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); + if (!my_cq) { + ehca_err(device, "Out of memory for ehca_cq struct device=%p", + device); + atomic_dec(&shca->num_cqs); + return ERR_PTR(-ENOMEM); + } + + memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); + + spin_lock_init(&my_cq->spinlock); + spin_lock_init(&my_cq->cb_lock); + spin_lock_init(&my_cq->task_lock); + atomic_set(&my_cq->nr_events, 0); + init_waitqueue_head(&my_cq->wait_completion); + + cq = &my_cq->ib_cq; + + adapter_handle = shca->ipz_hca_handle; + param.eq_handle = shca->eq.ipz_eq_handle; + + do { + if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't reserve idr nr. device=%p", + device); + goto create_cq_exit1; + } + + write_lock_irqsave(&ehca_cq_idr_lock, flags); + ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + } while (ret == -EAGAIN); + + if (ret) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't allocate new idr entry. device=%p", + device); + goto create_cq_exit1; + } + + if (my_cq->token > 0x1FFFFFF) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Invalid number of cq. device=%p", device); + goto create_cq_exit2; + } + + /* + * CQs maximum depth is 4GB-64, but we need additional 20 as buffer + * for receiving errors CQEs. + */ + param.nr_cqe = cqe + additional_cqe; + h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); + + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_alloc_resource_cq() failed " + "h_ret=%lli device=%p", h_ret, device); + cq = ERR_PTR(ehca2ib_return_code(h_ret)); + goto create_cq_exit2; + } + + ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); + if (!ipz_rc) { + ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", + ipz_rc, device); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit3; + } + + for (counter = 0; counter < param.act_pages; counter++) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if (!vpage) { + ehca_err(device, "ipz_qpageit_get_inc() " + "returns NULL device=%p", device); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_cq(adapter_handle, + my_cq->ipz_cq_handle, + &my_cq->pf, + 0, + 0, + rpage, + 1, + my_cq->galpas. + kernel); + + if (h_ret < H_SUCCESS) { + ehca_err(device, "hipz_h_register_rpage_cq() failed " + "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " + "act_pages=%i", my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit4; + } + + if (counter == (param.act_pages - 1)) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if ((h_ret != H_SUCCESS) || vpage) { + ehca_err(device, "Registration of pages not " + "complete ehca_cq=%p cq_num=%x " + "h_ret=%lli", my_cq, my_cq->cq_number, + h_ret); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(device, "Registration of page failed " + "ehca_cq=%p cq_num=%x h_ret=%lli " + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-ENOMEM); + goto create_cq_exit4; + } + } + } + + ipz_qeit_reset(&my_cq->ipz_queue); + + gal = my_cq->galpas.kernel; + cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", + my_cq, my_cq->cq_number, cqx_fec); + + my_cq->ib_cq.cqe = my_cq->nr_of_entries = + param.act_nr_of_entries - additional_cqe; + my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; + + for (i = 0; i < QP_HASHTAB_LEN; i++) + INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + + if (context) { + struct ipz_queue *ipz_queue = &my_cq->ipz_queue; + struct ehca_create_cq_resp resp; + memset(&resp, 0, sizeof(resp)); + resp.cq_number = my_cq->cq_number; + resp.token = my_cq->token; + resp.ipz_queue.qe_size = ipz_queue->qe_size; + resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; + resp.ipz_queue.queue_length = ipz_queue->queue_length; + resp.ipz_queue.pagesize = ipz_queue->pagesize; + resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + resp.fw_handle_ofs = (u32) + (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + ehca_err(device, "Copy to udata failed."); + goto create_cq_exit4; + } + } + + return cq; + +create_cq_exit4: + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + +create_cq_exit3: + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret != H_SUCCESS) + ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " + "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); + +create_cq_exit2: + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + +create_cq_exit1: + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return cq; +} + +int ehca_destroy_cq(struct ib_cq *cq) +{ + u64 h_ret; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int cq_num = my_cq->cq_number; + struct ib_device *device = cq->device; + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + unsigned long flags; + + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + } + + /* + * remove the CQ from the idr first to make sure + * no more interrupt tasklets will touch this CQ + */ + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* now wait until all pending events have completed */ + wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); + + /* nobody's using our CQ any longer -- we can destroy it */ + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); + if (h_ret == H_R_STATE) { + /* cq in err: read err data and destroy it forcibly */ + ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " + "state. Try to delete it forcibly.", + my_cq, cq_num, my_cq->ipz_cq_handle.handle); + ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret == H_SUCCESS) + ehca_dbg(device, "cq_num=%x deleted successfully.", + cq_num); + } + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " + "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); + return ehca2ib_return_code(h_ret); + } + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return 0; +} + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + /* TODO: proper resize needs to be done */ + ehca_err(cq->device, "not implemented yet"); + + return -EFAULT; +} + +int ehca_init_cq_cache(void) +{ + cq_cache = kmem_cache_create("ehca_cache_cq", + sizeof(struct ehca_cq), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!cq_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_cq_cache(void) +{ + if (cq_cache) + kmem_cache_destroy(cq_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c new file mode 100644 index 00000000..818d721f --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -0,0 +1,189 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Event queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_qes.h" +#include "hcp_if.h" +#include "ipz_pt_fn.h" + +int ehca_create_eq(struct ehca_shca *shca, + struct ehca_eq *eq, + const enum ehca_eq_type type, const u32 length) +{ + int ret; + u64 h_ret; + u32 nr_pages; + u32 i; + void *vpage; + struct ib_device *ib_dev = &shca->ib_device; + + spin_lock_init(&eq->spinlock); + spin_lock_init(&eq->irq_spinlock); + eq->is_initialized = 0; + + if (type != EHCA_EQ && type != EHCA_NEQ) { + ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); + return -EINVAL; + } + if (!length) { + ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); + return -EINVAL; + } + + h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); + + if (h_ret != H_SUCCESS) { + ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); + return -EINVAL; + } + + ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); + if (!ret) { + ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); + goto create_eq_exit1; + } + + for (i = 0; i < nr_pages; i++) { + u64 rpage; + + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) + goto create_eq_exit2; + + rpage = virt_to_abs(vpage); + h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); + + if (i == (nr_pages - 1)) { + /* last page */ + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (h_ret != H_SUCCESS || vpage) + goto create_eq_exit2; + } else { + if (h_ret != H_PAGE_REGISTERED) + goto create_eq_exit2; + } + } + + ipz_qeit_reset(&eq->ipz_queue); + + /* register interrupt handlers and initialize work queues */ + if (type == EHCA_EQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, + 0, "ehca_eq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } else if (type == EHCA_NEQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, + 0, "ehca_neq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } + + eq->is_initialized = 1; + + return 0; + +create_eq_exit2: + ipz_queue_dtor(NULL, &eq->ipz_queue); + +create_eq_exit1: + hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + return -EINVAL; +} + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + void *eqe; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + u64 h_ret; + + ibmebus_free_irq(eq->ist, (void *)shca); + + spin_lock_irqsave(&shca_list_lock, flags); + eq->is_initialized = 0; + spin_unlock_irqrestore(&shca_list_lock, flags); + + tasklet_kill(&eq->interrupt_task); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't free EQ resources."); + return -EINVAL; + } + ipz_queue_dtor(NULL, &eq->ipz_queue); + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c new file mode 100644 index 00000000..9ed4d258 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -0,0 +1,410 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HCA query functions + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static unsigned int limit_uint(unsigned int value) +{ + return min_t(unsigned int, value, INT_MAX); +} + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +{ + int i, ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_hca *rblock; + + static const u32 cap_mapping[] = { + IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, + IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, + IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, + IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, + IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, + IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, + IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, + IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, + IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, + }; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto query_device1; + } + + memset(props, 0, sizeof(struct ib_device_attr)); + props->page_size_cap = shca->hca_cap_mr_pgsize; + props->fw_ver = rblock->hw_ver; + props->max_mr_size = rblock->max_mr_size; + props->vendor_id = rblock->vendor_id >> 8; + props->vendor_part_id = rblock->vendor_part_id >> 16; + props->hw_ver = rblock->hw_ver; + props->max_qp = limit_uint(rblock->max_qp); + props->max_qp_wr = limit_uint(rblock->max_wqes_wq); + props->max_sge = limit_uint(rblock->max_sge); + props->max_sge_rd = limit_uint(rblock->max_sge_rd); + props->max_cq = limit_uint(rblock->max_cq); + props->max_cqe = limit_uint(rblock->max_cqe); + props->max_mr = limit_uint(rblock->max_mr); + props->max_mw = limit_uint(rblock->max_mw); + props->max_pd = limit_uint(rblock->max_pd); + props->max_ah = limit_uint(rblock->max_ah); + props->max_ee = limit_uint(rblock->max_rd_ee_context); + props->max_rdd = limit_uint(rblock->max_rd_domain); + props->max_fmr = limit_uint(rblock->max_mr); + props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); + props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); + props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); + props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); + props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + props->max_srq = limit_uint(props->max_qp); + props->max_srq_wr = limit_uint(props->max_qp_wr); + props->max_srq_sge = 3; + } + + props->max_pkeys = 16; + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; + props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); + props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); + props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); + props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); + props->max_total_mcast_qp_attach + = limit_uint(rblock->max_total_mcast_qp_attach); + + /* translate device capabilities */ + props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; + for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) + if (rblock->hca_cap_indicators & cap_mapping[i + 1]) + props->device_cap_flags |= cap_mapping[i]; + +query_device1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) +{ + switch (fw_mtu) { + case 0x1: + return IB_MTU_256; + case 0x2: + return IB_MTU_512; + case 0x3: + return IB_MTU_1024; + case 0x4: + return IB_MTU_2048; + case 0x5: + return IB_MTU_4096; + default: + ehca_err(&shca->ib_device, "Unknown MTU size: %x.", + fw_mtu); + return 0; + } +} + +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +{ + switch (vl_cap) { + case 0x1: + return 1; + case 0x2: + return 2; + case 0x3: + return 4; + case 0x4: + return 8; + case 0x5: + return 15; + default: + ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", + vl_cap); + return 0; + } +} + +int ehca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_port1; + } + + memset(props, 0, sizeof(struct ib_port_attr)); + + props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); + props->port_cap_flags = rblock->capability_mask; + props->gid_tbl_len = rblock->gid_tbl_len; + if (rblock->max_msg_sz) + props->max_msg_sz = rblock->max_msg_sz; + else + props->max_msg_sz = 0x1 << 31; + props->bad_pkey_cntr = rblock->bad_pkey_cntr; + props->qkey_viol_cntr = rblock->qkey_viol_cntr; + props->pkey_tbl_len = rblock->pkey_tbl_len; + props->lid = rblock->lid; + props->sm_lid = rblock->sm_lid; + props->lmc = rblock->lmc; + props->sm_sl = rblock->sm_sl; + props->subnet_timeout = rblock->subnet_timeout; + props->init_type_reply = rblock->init_type_reply; + props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); + + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = IB_SPEED_SDR; + } + +query_port1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_sma_attr(struct ehca_shca *shca, + u8 port, struct ehca_sma_attr *attr) +{ + int ret = 0; + u64 h_ret; + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_sma_attr1; + } + + memset(attr, 0, sizeof(struct ehca_sma_attr)); + + attr->lid = rblock->lid; + attr->lmc = rblock->lmc; + attr->sm_sl = rblock->sm_sl; + attr->sm_lid = rblock->sm_lid; + + attr->pkey_tbl_len = rblock->pkey_tbl_len; + memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); + +query_sma_attr1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if (index > 16) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_pkey1; + } + + memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); + +query_pkey1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + if (index < 0 || index > 255) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_gid1; + } + + memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); + memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); + +query_gid1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static const u32 allowed_port_caps = ( + IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP); + +int ehca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + u32 cap; + u64 hret; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if ((props->set_port_cap_mask | props->clr_port_cap_mask) + & ~allowed_port_caps) { + ehca_err(&shca->ib_device, "Non-changeable bits set in masks " + "set=%x clr=%x allowed=%x", props->set_port_cap_mask, + props->clr_port_cap_mask, allowed_port_caps); + return -EINVAL; + } + + if (mutex_lock_interruptible(&shca->modify_mutex)) + return -ERESTARTSYS; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto modify_port1; + } + + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto modify_port2; + } + + cap = (rblock->capability_mask | props->set_port_cap_mask) + & ~props->clr_port_cap_mask; + + hret = hipz_h_modify_port(shca->ipz_hca_handle, port, + cap, props->init_type, port_modify_mask); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", + hret); + ret = -EINVAL; + } + +modify_port2: + ehca_free_fw_ctrlblock(rblock); + +modify_port1: + mutex_unlock(&shca->modify_mutex); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c new file mode 100644 index 00000000..53589000 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -0,0 +1,942 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) + +static void queue_comp_task(struct ehca_cq *__cq); + +static struct ehca_comp_pool *pool; + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + return; +} + +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp *)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + ehca_err(&shca->ib_device, + "QP 0x%x (resource=%llx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq *)data; + + ehca_err(&shca->ib_device, + "CQ 0x%x (resource=%llx) has errors.", + cq->cq_number, resource); + break; + } + default: + ehca_err(&shca->ib_device, + "Unknown error type: %llx on %s.", + type, shca->ib_device.name); + break; + } + + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data begin " + "---------------------------------------------------"); + ehca_dmp(rblock, length, "resource=%llx", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data end " + "----------------------------------------------------"); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret; + u64 *rblock; + unsigned long block_count; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + /* rblock must be 4K aligned and should be 4K large */ + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) + ehca_err(&shca->ib_device, + "No error data is available: %llx.", resource); + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > EHCA_PAGESIZE) + length = EHCA_PAGESIZE; + + print_error_data(shca, data, rblock, length); + } else + ehca_err(&shca->ib_device, + "Error data could not be fetched: %llx", resource); + + ehca_free_fw_ctrlblock(rblock); + +error_data1: + return ret; + +} + +static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, + enum ib_event_type event_type) +{ + struct ib_event event; + + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + + event.device = &shca->ib_device; + event.event = event_type; + + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; + + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } +} + +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) +{ + struct ehca_qp *qp; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); + read_unlock(&ehca_qp_idr_lock); + + if (!qp) + return; + + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? + IB_EVENT_SRQ_ERR : event_type); + + /* + * eHCA only processes one WQE at a time for SRQ base QPs, + * so the last WQE has been processed as soon as the QP enters + * error state. + */ + if (fatal && qp->ext_type == EQPT_SRQBASE) + dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + ehca_err(&shca->ib_device, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + ehca_err(&shca->ib_device, "Port event."); + break; + case 0x0B: /* MR access error */ + ehca_err(&shca->ib_device, "MR access error."); + break; + case 0x0C: /* EQ error */ + ehca_err(&shca->ib_device, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + ehca_err(&shca->ib_device, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + ehca_err(&shca->ib_device, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + ehca_err(&shca->ib_device, "Unaffiliated access error."); + break; + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); + break; + case 0x13: /* interface trace stopped */ + ehca_err(&shca->ib_device, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; + default: + ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + return; +} + +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) +{ + struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, &sport->saved_attr); + } else { + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); + } else + notify_port_conf_change(shca, port); + break; + case 0x32: /* adapter malfunction */ + ehca_err(&shca->ib_device, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + ehca_err(&shca->ib_device, "Traced stopped."); + break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; + default: + ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP; + struct h_galpa gal = cq->galpas.kernel; + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret; + + eqe = ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, "Can't clear notification events."); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + return IRQ_HANDLED; +} + + +static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) +{ + u64 eqe_value; + u32 token; + struct ehca_cq *cq; + + eqe_value = eqe->entry; + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + ehca_dbg(&shca->ib_device, "Got completion event"); + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (cq == NULL) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq token=%x", + token); + return; + } + reset_eq_pending(cq); + if (ehca_scaling_code) + queue_comp_task(cq); + else { + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eqe_value); + } +} + +void ehca_process_eq(struct ehca_shca *shca, int is_irq) +{ + struct ehca_eq *eq = &shca->eq; + struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; + u64 eqe_value, ret; + int eqe_cnt, i; + int eq_empty = 0; + + spin_lock(&eq->irq_spinlock); + if (is_irq) { + const int max_query_cnt = 100; + int query_cnt = 0; + int int_state = 1; + do { + int_state = hipz_h_query_int_state( + shca->ipz_hca_handle, eq->ist); + query_cnt++; + iosync(); + } while (int_state && query_cnt < max_query_cnt); + if (unlikely((query_cnt == max_query_cnt))) + ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", + int_state, query_cnt); + } + + /* read out all eqes */ + eqe_cnt = 0; + do { + u32 token; + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); + if (!eqe_cache[eqe_cnt].eqe) + break; + eqe_value = eqe_cache[eqe_cnt].eqe->entry; + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (!eqe_cache[eqe_cnt].cq) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq " + "token=%x", token); + continue; + } + } else + eqe_cache[eqe_cnt].cq = NULL; + eqe_cnt++; + } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); + if (!eqe_cnt) { + if (is_irq) + ehca_dbg(&shca->ib_device, + "No eqe found for irq event"); + goto unlock_irq_spinlock; + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %lld\n", ret); + ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } + if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) + ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); + /* enable irq for new packets */ + for (i = 0; i < eqe_cnt; i++) { + if (eq->eqe_cache[i].cq) + reset_eq_pending(eq->eqe_cache[i].cq); + } + /* check eq */ + spin_lock(&eq->spinlock); + eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); + spin_unlock(&eq->spinlock); + /* call completion handler for cached eqes */ + for (i = 0; i < eqe_cnt; i++) + if (eq->eqe_cache[i].cq) { + if (ehca_scaling_code) + queue_comp_task(eq->eqe_cache[i].cq); + else { + struct ehca_cq *cq = eq->eqe_cache[i].cq; + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eq->eqe_cache[i].eqe->entry); + } + /* poll eq if not empty */ + if (eq_empty) + goto unlock_irq_spinlock; + do { + struct ehca_eqe *eqe; + eqe = ehca_poll_eq(shca, &shca->eq); + if (!eqe) + break; + process_eqe(shca, eqe); + } while (1); + +unlock_irq_spinlock: + spin_unlock(&eq->irq_spinlock); +} + +void ehca_tasklet_eq(unsigned long data) +{ + ehca_process_eq((struct ehca_shca*)data, 1); +} + +static inline int find_next_online_cpu(struct ehca_comp_pool *pool) +{ + int cpu; + unsigned long flags; + + WARN_ON_ONCE(!in_interrupt()); + if (ehca_debug_level >= 3) + ehca_dmp(cpu_online_mask, cpumask_size(), ""); + + spin_lock_irqsave(&pool->last_cpu_lock, flags); + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + spin_unlock_irqrestore(&pool->last_cpu_lock, flags); + + return cpu; +} + +static void __queue_comp_task(struct ehca_cq *__cq, + struct ehca_cpu_comp_task *cct) +{ + unsigned long flags; + + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&__cq->task_lock); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up(&cct->wait_queue); + } else + __cq->nr_callbacks++; + + spin_unlock(&__cq->task_lock); + spin_unlock_irqrestore(&cct->task_lock, flags); +} + +static void queue_comp_task(struct ehca_cq *__cq) +{ + int cpu_id; + struct ehca_cpu_comp_task *cct; + int cq_jobs; + unsigned long flags; + + cpu_id = find_next_online_cpu(pool); + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + BUG_ON(!cct); + + spin_lock_irqsave(&cct->task_lock, flags); + cq_jobs = cct->cq_jobs; + spin_unlock_irqrestore(&cct->task_lock, flags); + if (cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + BUG_ON(!cct); + } + + __queue_comp_task(__cq, cct); +} + +static void run_comp_task(struct ehca_cpu_comp_task *cct) +{ + struct ehca_cq *cq; + unsigned long flags; + + spin_lock_irqsave(&cct->task_lock, flags); + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irqrestore(&cct->task_lock, flags); + + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&cq->task_lock); + cq->nr_callbacks--; + if (!cq->nr_callbacks) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock(&cq->task_lock); + } + + spin_unlock_irqrestore(&cct->task_lock, flags); +} + +static int comp_task(void *__cct) +{ + struct ehca_cpu_comp_task *cct = __cct; + int cql_empty; + DECLARE_WAITQUEUE(wait, current); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + add_wait_queue(&cct->wait_queue, &wait); + + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + spin_unlock_irq(&cct->task_lock); + if (cql_empty) + schedule(); + else + __set_current_state(TASK_RUNNING); + + remove_wait_queue(&cct->wait_queue, &wait); + + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + spin_unlock_irq(&cct->task_lock); + if (!cql_empty) + run_comp_task(__cct); + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + return 0; +} + +static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + init_waitqueue_head(&cct->wait_queue); + cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), + "ehca_comp/%d", cpu); + + return cct->task; +} + +static void destroy_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + struct task_struct *task; + unsigned long flags_cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + task = cct->task; + cct->task = NULL; + cct->cq_jobs = 0; + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + + if (task) + kthread_stop(task); +} + +static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + LIST_HEAD(list); + struct ehca_cq *cq; + unsigned long flags_cct; + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + list_splice_init(&cct->cq_list, &list); + + while (!list_empty(&list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + + list_del(&cq->entry); + __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks)); + } + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + +} + +static int __cpuinit comp_pool_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct ehca_cpu_comp_task *cct; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); + if (!create_comp_task(pool, cpu)) { + ehca_gen_err("Can't create comp_task for cpu: %x", cpu); + return notifier_from_errno(-ENOMEM); + } + break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + kthread_bind(cct->task, cpumask_any(cpu_online_mask)); + destroy_comp_task(pool, cpu); + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + kthread_bind(cct->task, cpu); + wake_up_process(cct->task); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); + break; + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); + destroy_comp_task(pool, cpu); + take_over_work(pool, cpu); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block comp_pool_callback_nb __cpuinitdata = { + .notifier_call = comp_pool_callback, + .priority = 0, +}; + +int ehca_create_comp_pool(void) +{ + int cpu; + struct task_struct *task; + + if (!ehca_scaling_code) + return 0; + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return -ENOMEM; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = cpumask_any(cpu_online_mask); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (pool->cpu_comp_tasks == NULL) { + kfree(pool); + return -EINVAL; + } + + for_each_online_cpu(cpu) { + task = create_comp_task(pool, cpu); + if (task) { + kthread_bind(task, cpu); + wake_up_process(task); + } + } + + register_hotcpu_notifier(&comp_pool_callback_nb); + + printk(KERN_INFO "eHCA scaling code enabled\n"); + + return 0; +} + +void ehca_destroy_comp_pool(void) +{ + int i; + + if (!ehca_scaling_code) + return; + + unregister_hotcpu_notifier(&comp_pool_callback_nb); + + for_each_online_cpu(i) + destroy_comp_task(pool, i); + + free_percpu(pool->cpu_comp_tasks); + kfree(pool); +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h new file mode 100644 index 00000000..3346cb06 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -0,0 +1,77 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions and structs for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IRQ_H +#define __EHCA_IRQ_H + + +struct ehca_shca; + +#include +#include + +int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); +void ehca_tasklet_neq(unsigned long data); + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); +void ehca_tasklet_eq(unsigned long data); +void ehca_process_eq(struct ehca_shca *shca, int is_irq); + +struct ehca_cpu_comp_task { + wait_queue_head_t wait_queue; + struct list_head cq_list; + struct task_struct *task; + spinlock_t task_lock; + int cq_jobs; +}; + +struct ehca_comp_pool { + struct ehca_cpu_comp_task *cpu_comp_tasks; + int last_cpu; + spinlock_t last_cpu_lock; +}; + +int ehca_create_comp_pool(void); +void ehca_destroy_comp_pool(void); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h new file mode 100644 index 00000000..8f7f282e --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -0,0 +1,212 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions for internal functions + * + * Authors: Heiko J Schick + * Dietmar Decker + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IVERBS_H__ +#define __EHCA_IVERBS_H__ + +#include "ehca_classes.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); + +int ehca_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); + +int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, + struct ehca_sma_attr *attr); + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); + +int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); + +int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_destroy_ah(struct ib_ah *ah); + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, u64 *iova_start); + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, int mr_access_flags, u64 *iova_start); + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +int ehca_dereg_mr(struct ib_mr *mr); + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); + +int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + +int ehca_dealloc_mw(struct ib_mw *mw); + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, u64 iova); + +int ehca_unmap_fmr(struct list_head *fmr_list); + +int ehca_dealloc_fmr(struct ib_fmr *fmr); + +enum ehca_eq_type { + EHCA_EQ = 0, /* Event Queue */ + EHCA_NEQ /* Notification Event Queue */ +}; + +int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, + enum ehca_eq_type type, const u32 length); + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); + + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_destroy_cq(struct ib_cq *cq); + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + +int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_destroy_qp(struct ib_qp *qp); + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); + +int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + +int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + +int ehca_destroy_srq(struct ib_srq *srq); + +u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, + struct ib_qp_init_attr *qp_init_attr); + +int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata); + +int ehca_dealloc_ucontext(struct ib_ucontext *context); + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + +void ehca_poll_eqs(unsigned long data); + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + +#ifdef CONFIG_PPC_64K_PAGES +void *ehca_alloc_fw_ctrlblock(gfp_t flags); +void ehca_free_fw_ctrlblock(void *ptr); +#else +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) +#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) +#endif + +void ehca_recover_sqp(struct ib_qp *sqp); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c new file mode 100644 index 00000000..832e7a7d --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -0,0 +1,1100 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * module start stop, hca detection + * + * Authors: Heiko J Schick + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef CONFIG_PPC_64K_PAGES +#include +#endif + +#include +#include +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +#define HCAD_VERSION "0029" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Christoph Raisch "); +MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); +MODULE_VERSION(HCAD_VERSION); + +static bool ehca_open_aqp1 = 0; +static int ehca_hw_level = 0; +static bool ehca_poll_all_eqs = 1; + +int ehca_debug_level = 0; +int ehca_nr_ports = -1; +bool ehca_use_hp_mr = 0; +int ehca_port_act_time = 30; +int ehca_static_rate = -1; +bool ehca_scaling_code = 0; +int ehca_lock_hcalls = -1; +int ehca_max_cq = -1; +int ehca_max_qp = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); +module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); +module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); +module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); +module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); +module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); +module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); +module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); + +MODULE_PARM_DESC(open_aqp1, + "Open AQP1 on startup (default: no)"); +MODULE_PARM_DESC(debug_level, + "Amount of debug output (0: none (default), 1: traces, " + "2: some dumps, 3: lots)"); +MODULE_PARM_DESC(hw_level, + "Hardware level (0: autosensing (default), " + "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); +MODULE_PARM_DESC(nr_ports, + "number of connected ports (-1: autodetect (default), " + "1: port one only, 2: two ports)"); +MODULE_PARM_DESC(use_hp_mr, + "Use high performance MRs (default: no)"); +MODULE_PARM_DESC(port_act_time, + "Time to wait for port activation (default: 30 sec)"); +MODULE_PARM_DESC(poll_all_eqs, + "Poll all event queues periodically (default: yes)"); +MODULE_PARM_DESC(static_rate, + "Set permanent static rate (default: no static rate)"); +MODULE_PARM_DESC(scaling_code, + "Enable scaling code (default: no)"); +MODULE_PARM_DESC(lock_hcalls, + "Serialize all hCalls made by the driver " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); + +DEFINE_RWLOCK(ehca_qp_idr_lock); +DEFINE_RWLOCK(ehca_cq_idr_lock); +DEFINE_IDR(ehca_qp_idr); +DEFINE_IDR(ehca_cq_idr); + +static LIST_HEAD(shca_list); /* list of all registered ehcas */ +DEFINE_SPINLOCK(shca_list_lock); + +static struct timer_list poll_eqs_timer; + +#ifdef CONFIG_PPC_64K_PAGES +static struct kmem_cache *ctblk_cache; + +void *ehca_alloc_fw_ctrlblock(gfp_t flags) +{ + void *ret = kmem_cache_zalloc(ctblk_cache, flags); + if (!ret) + ehca_gen_err("Out of memory for ctblk"); + return ret; +} + +void ehca_free_fw_ctrlblock(void *ptr) +{ + if (ptr) + kmem_cache_free(ctblk_cache, ptr); + +} +#endif + +int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: + return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + +static int ehca_create_slab_caches(void) +{ + int ret; + + ret = ehca_init_pd_cache(); + if (ret) { + ehca_gen_err("Cannot create PD SLAB cache."); + return ret; + } + + ret = ehca_init_cq_cache(); + if (ret) { + ehca_gen_err("Cannot create CQ SLAB cache."); + goto create_slab_caches2; + } + + ret = ehca_init_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create QP SLAB cache."); + goto create_slab_caches3; + } + + ret = ehca_init_av_cache(); + if (ret) { + ehca_gen_err("Cannot create AV SLAB cache."); + goto create_slab_caches4; + } + + ret = ehca_init_mrmw_cache(); + if (ret) { + ehca_gen_err("Cannot create MR&MW SLAB cache."); + goto create_slab_caches5; + } + + ret = ehca_init_small_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create small queue SLAB cache."); + goto create_slab_caches6; + } + +#ifdef CONFIG_PPC_64K_PAGES + ctblk_cache = kmem_cache_create("ehca_cache_ctblk", + EHCA_PAGESIZE, H_CB_ALIGNMENT, + SLAB_HWCACHE_ALIGN, + NULL); + if (!ctblk_cache) { + ehca_gen_err("Cannot create ctblk SLAB cache."); + ehca_cleanup_small_qp_cache(); + goto create_slab_caches6; + } +#endif + return 0; + +create_slab_caches6: + ehca_cleanup_mrmw_cache(); + +create_slab_caches5: + ehca_cleanup_av_cache(); + +create_slab_caches4: + ehca_cleanup_qp_cache(); + +create_slab_caches3: + ehca_cleanup_cq_cache(); + +create_slab_caches2: + ehca_cleanup_pd_cache(); + + return ret; +} + +static void ehca_destroy_slab_caches(void) +{ + ehca_cleanup_small_qp_cache(); + ehca_cleanup_mrmw_cache(); + ehca_cleanup_av_cache(); + ehca_cleanup_qp_cache(); + ehca_cleanup_cq_cache(); + ehca_cleanup_pd_cache(); +#ifdef CONFIG_PPC_64K_PAGES + if (ctblk_cache) + kmem_cache_destroy(ctblk_cache); +#endif +} + +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) + +static struct cap_descr { + u64 mask; + char *descr; +} hca_cap_descr[] = { + { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, + { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, + { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, + { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, + { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, + { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, + { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, + { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, + { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, + { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, + { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, + { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, + { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, + { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, + { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, + { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, + { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, + { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, +}; + +static int ehca_sense_attributes(struct ehca_shca *shca) +{ + int i, ret = 0; + u64 h_ret; + struct hipz_query_hca *rblock; + struct hipz_query_port *port; + const char *loc_code; + + static const u32 pgsize_map[] = { + HCA_CAP_MR_PGSIZE_4K, 0x1000, + HCA_CAP_MR_PGSIZE_64K, 0x10000, + HCA_CAP_MR_PGSIZE_1M, 0x100000, + HCA_CAP_MR_PGSIZE_16M, 0x1000000, + }; + + ehca_gen_dbg("Probing adapter %s...", + shca->ofdev->dev.of_node->full_name); + loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", + NULL); + if (loc_code) + ehca_gen_dbg(" ... location lode=%s", loc_code); + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_gen_err("Cannot allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query device properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + if (ehca_nr_ports == 1) + shca->num_ports = 1; + else + shca->num_ports = (u8)rblock->num_ports; + + ehca_gen_dbg(" ... found %x ports", rblock->num_ports); + + if (ehca_hw_level == 0) { + u32 hcaaver; + u32 revid; + + hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); + revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); + + ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); + + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { + ehca_gen_warn("unknown hardware version" + " - assuming default level"); + shca->hw_level = 0x22; + } + } else + shca->hw_level = ehca_hw_level; + ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); + + shca->hca_cap = rblock->hca_cap_indicators; + ehca_gen_dbg(" ... HCA capabilities:"); + for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) + if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) + ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + + /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is + * a firmware property, so it's valid across all adapters + */ + if (ehca_lock_hcalls == -1) + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); + + /* translate supported MR page sizes; always support 4K */ + shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; + for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) + if (rblock->memory_page_size_supported & pgsize_map[i]) + shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (shca->max_num_qps == -1) + shca->max_num_qps = min_t(int, rblock->max_qp, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { + ehca_gen_warn("The requested number of QPs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_qp, rblock->max_qp); + shca->max_num_qps = rblock->max_qp; + } + + if (shca->max_num_cqs == -1) + shca->max_num_cqs = min_t(int, rblock->max_cq, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { + ehca_gen_warn("The requested number of CQs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_cq, rblock->max_cq); + } + + /* query max MTU from first port -- it's the same for all ports */ + port = (struct hipz_query_port *)rblock; + h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query port properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + shca->max_mtu = port->max_mtu; + +sense_attributes1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int init_node_guid(struct ehca_shca *shca) +{ + int ret = 0; + struct hipz_query_hca *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto init_node_guid1; + } + + memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); + +init_node_guid1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int ehca_init_device(struct ehca_shca *shca) +{ + int ret; + + ret = init_node_guid(shca); + if (ret) + return ret; + + strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); + shca->ib_device.owner = THIS_MODULE; + + shca->ib_device.uverbs_abi_ver = 8; + shca->ib_device.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + + shca->ib_device.node_type = RDMA_NODE_IB_CA; + shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.num_comp_vectors = 1; + shca->ib_device.dma_device = &shca->ofdev->dev; + shca->ib_device.query_device = ehca_query_device; + shca->ib_device.query_port = ehca_query_port; + shca->ib_device.query_gid = ehca_query_gid; + shca->ib_device.query_pkey = ehca_query_pkey; + /* shca->in_device.modify_device = ehca_modify_device */ + shca->ib_device.modify_port = ehca_modify_port; + shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; + shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; + shca->ib_device.alloc_pd = ehca_alloc_pd; + shca->ib_device.dealloc_pd = ehca_dealloc_pd; + shca->ib_device.create_ah = ehca_create_ah; + /* shca->ib_device.modify_ah = ehca_modify_ah; */ + shca->ib_device.query_ah = ehca_query_ah; + shca->ib_device.destroy_ah = ehca_destroy_ah; + shca->ib_device.create_qp = ehca_create_qp; + shca->ib_device.modify_qp = ehca_modify_qp; + shca->ib_device.query_qp = ehca_query_qp; + shca->ib_device.destroy_qp = ehca_destroy_qp; + shca->ib_device.post_send = ehca_post_send; + shca->ib_device.post_recv = ehca_post_recv; + shca->ib_device.create_cq = ehca_create_cq; + shca->ib_device.destroy_cq = ehca_destroy_cq; + shca->ib_device.resize_cq = ehca_resize_cq; + shca->ib_device.poll_cq = ehca_poll_cq; + /* shca->ib_device.peek_cq = ehca_peek_cq; */ + shca->ib_device.req_notify_cq = ehca_req_notify_cq; + /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ + shca->ib_device.get_dma_mr = ehca_get_dma_mr; + shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; + shca->ib_device.reg_user_mr = ehca_reg_user_mr; + shca->ib_device.query_mr = ehca_query_mr; + shca->ib_device.dereg_mr = ehca_dereg_mr; + shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; + shca->ib_device.alloc_mw = ehca_alloc_mw; + shca->ib_device.bind_mw = ehca_bind_mw; + shca->ib_device.dealloc_mw = ehca_dealloc_mw; + shca->ib_device.alloc_fmr = ehca_alloc_fmr; + shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; + shca->ib_device.unmap_fmr = ehca_unmap_fmr; + shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; + shca->ib_device.attach_mcast = ehca_attach_mcast; + shca->ib_device.detach_mcast = ehca_detach_mcast; + shca->ib_device.process_mad = ehca_process_mad; + shca->ib_device.mmap = ehca_mmap; + shca->ib_device.dma_ops = &ehca_dma_mapping_ops; + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + shca->ib_device.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + shca->ib_device.create_srq = ehca_create_srq; + shca->ib_device.modify_srq = ehca_modify_srq; + shca->ib_device.query_srq = ehca_query_srq; + shca->ib_device.destroy_srq = ehca_destroy_srq; + shca->ib_device.post_srq_recv = ehca_post_srq_recv; + } + + return ret; +} + +static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) +{ + struct ehca_sport *sport = &shca->sport[port - 1]; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct ib_qp_init_attr qp_init_attr; + int ret; + + if (sport->ibcq_aqp1) { + ehca_err(&shca->ib_device, "AQP1 CQ is already created."); + return -EPERM; + } + + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); + if (IS_ERR(ibcq)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); + return PTR_ERR(ibcq); + } + sport->ibcq_aqp1 = ibcq; + + if (sport->ibqp_sqp[IB_QPT_GSI]) { + ehca_err(&shca->ib_device, "AQP1 QP is already created."); + ret = -EPERM; + goto create_aqp1; + } + + memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); + qp_init_attr.send_cq = ibcq; + qp_init_attr.recv_cq = ibcq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = 100; + qp_init_attr.cap.max_recv_wr = 100; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.qp_type = IB_QPT_GSI; + qp_init_attr.port_num = port; + qp_init_attr.qp_context = NULL; + qp_init_attr.event_handler = NULL; + qp_init_attr.srq = NULL; + + ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); + if (IS_ERR(ibqp)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); + ret = PTR_ERR(ibqp); + goto create_aqp1; + } + sport->ibqp_sqp[IB_QPT_GSI] = ibqp; + + return 0; + +create_aqp1: + ib_destroy_cq(sport->ibcq_aqp1); + return ret; +} + +static int ehca_destroy_aqp1(struct ehca_sport *sport) +{ + int ret; + + ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); + if (ret) { + ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); + return ret; + } + + ret = ib_destroy_cq(sport->ibcq_aqp1); + if (ret) + ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); + + return ret; +} + +static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); +} + +static ssize_t ehca_store_debug_level(struct device_driver *ddp, + const char *buf, size_t count) +{ + int value = (*buf) - '0'; + if (value >= 0 && value <= 9) + ehca_debug_level = value; + return 1; +} + +static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, + ehca_show_debug_level, ehca_store_debug_level); + +static struct attribute *ehca_drv_attrs[] = { + &driver_attr_debug_level.attr, + NULL +}; + +static struct attribute_group ehca_drv_attr_grp = { + .attrs = ehca_drv_attrs +}; + +static const struct attribute_group *ehca_drv_attr_groups[] = { + &ehca_drv_attr_grp, + NULL, +}; + +#define EHCA_RESOURCE_ATTR(name) \ +static ssize_t ehca_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct ehca_shca *shca; \ + struct hipz_query_hca *rblock; \ + int data; \ + \ + shca = dev_get_drvdata(dev); \ + \ + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ + if (!rblock) { \ + dev_err(dev, "Can't allocate rblock memory.\n"); \ + return 0; \ + } \ + \ + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ + dev_err(dev, "Can't query device properties\n"); \ + ehca_free_fw_ctrlblock(rblock); \ + return 0; \ + } \ + \ + data = rblock->name; \ + ehca_free_fw_ctrlblock(rblock); \ + \ + if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ + return snprintf(buf, 256, "1\n"); \ + else \ + return snprintf(buf, 256, "%d\n", data); \ + \ +} \ +static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); + +EHCA_RESOURCE_ATTR(num_ports); +EHCA_RESOURCE_ATTR(hw_ver); +EHCA_RESOURCE_ATTR(max_eq); +EHCA_RESOURCE_ATTR(cur_eq); +EHCA_RESOURCE_ATTR(max_cq); +EHCA_RESOURCE_ATTR(cur_cq); +EHCA_RESOURCE_ATTR(max_qp); +EHCA_RESOURCE_ATTR(cur_qp); +EHCA_RESOURCE_ATTR(max_mr); +EHCA_RESOURCE_ATTR(cur_mr); +EHCA_RESOURCE_ATTR(max_mw); +EHCA_RESOURCE_ATTR(cur_mw); +EHCA_RESOURCE_ATTR(max_pd); +EHCA_RESOURCE_ATTR(max_ah); + +static ssize_t ehca_show_adapter_handle(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ehca_shca *shca = dev_get_drvdata(dev); + + return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); + +} +static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); + +static struct attribute *ehca_dev_attrs[] = { + &dev_attr_adapter_handle.attr, + &dev_attr_num_ports.attr, + &dev_attr_hw_ver.attr, + &dev_attr_max_eq.attr, + &dev_attr_cur_eq.attr, + &dev_attr_max_cq.attr, + &dev_attr_cur_cq.attr, + &dev_attr_max_qp.attr, + &dev_attr_cur_qp.attr, + &dev_attr_max_mr.attr, + &dev_attr_cur_mr.attr, + &dev_attr_max_mw.attr, + &dev_attr_cur_mw.attr, + &dev_attr_max_pd.attr, + &dev_attr_max_ah.attr, + NULL +}; + +static struct attribute_group ehca_dev_attr_grp = { + .attrs = ehca_dev_attrs +}; + +static int __devinit ehca_probe(struct platform_device *dev, + const struct of_device_id *id) +{ + struct ehca_shca *shca; + const u64 *handle; + struct ib_pd *ibpd; + int ret, i, eq_size; + unsigned long flags; + + handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); + if (!handle) { + ehca_gen_err("Cannot get eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + if (!(*handle)) { + ehca_gen_err("Wrong eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); + if (!shca) { + ehca_gen_err("Cannot allocate shca memory."); + return -ENOMEM; + } + + mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); + shca->max_num_qps = ehca_max_qp; + shca->max_num_cqs = ehca_max_cq; + + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) + spin_lock_init(&shca->sport[i].mod_sqp_lock); + + shca->ofdev = dev; + shca->ipz_hca_handle.handle = *handle; + dev_set_drvdata(&dev->dev, shca); + + ret = ehca_sense_attributes(shca); + if (ret < 0) { + ehca_gen_err("Cannot sense eHCA attributes."); + goto probe1; + } + + ret = ehca_init_device(shca); + if (ret) { + ehca_gen_err("Cannot init ehca device struct"); + goto probe1; + } + + eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; + /* create event queues */ + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create EQ."); + goto probe1; + } + + ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create NEQ."); + goto probe3; + } + + /* create internal protection domain */ + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); + if (IS_ERR(ibpd)) { + ehca_err(&shca->ib_device, "Cannot create internal PD."); + ret = PTR_ERR(ibpd); + goto probe4; + } + + shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); + shca->pd->ib_pd.device = &shca->ib_device; + + /* create internal max MR */ + ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); + + if (ret) { + ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", + ret); + goto probe5; + } + + ret = ib_register_device(&shca->ib_device, NULL); + if (ret) { + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%i", ret); + goto probe6; + } + + /* create AQP1 for port 1 */ + if (ehca_open_aqp1 == 1) { + shca->sport[0].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 1); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 1."); + goto probe7; + } + } + + /* create AQP1 for port 2 */ + if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { + shca->sport[1].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 2); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 2."); + goto probe8; + } + } + + ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); + if (ret) /* only complain; we can live without attributes */ + ehca_err(&shca->ib_device, + "Cannot create device attributes ret=%d", ret); + + spin_lock_irqsave(&shca_list_lock, flags); + list_add(&shca->shca_list, &shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return 0; + +probe8: + ret = ehca_destroy_aqp1(&shca->sport[0]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port 1. ret=%i", ret); + +probe7: + ib_unregister_device(&shca->ib_device); + +probe6: + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + +probe5: + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + +probe4: + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy NEQ. ret=%x", ret); + +probe3: + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy EQ. ret=%x", ret); + +probe1: + ib_dealloc_device(&shca->ib_device); + + return -EINVAL; +} + +static int __devexit ehca_remove(struct platform_device *dev) +{ + struct ehca_shca *shca = dev_get_drvdata(&dev->dev); + unsigned long flags; + int ret; + + sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); + + if (ehca_open_aqp1 == 1) { + int i; + for (i = 0; i < shca->num_ports; i++) { + ret = ehca_destroy_aqp1(&shca->sport[i]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port %x " + "ret=%i", ret, i); + } + } + + ib_unregister_device(&shca->ib_device); + + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%i", ret); + + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); + + ib_dealloc_device(&shca->ib_device); + + spin_lock_irqsave(&shca_list_lock, flags); + list_del(&shca->shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return ret; +} + +static struct of_device_id ehca_device_table[] = +{ + { + .name = "lhca", + .compatible = "IBM,lhca", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehca_device_table); + +static struct of_platform_driver ehca_driver = { + .probe = ehca_probe, + .remove = ehca_remove, + .driver = { + .name = "ehca", + .owner = THIS_MODULE, + .groups = ehca_drv_attr_groups, + .of_match_table = ehca_device_table, + }, +}; + +void ehca_poll_eqs(unsigned long data) +{ + struct ehca_shca *shca; + + spin_lock(&shca_list_lock); + list_for_each_entry(shca, &shca_list, shca_list) { + if (shca->eq.is_initialized) { + /* call deadman proc only if eq ptr does not change */ + struct ehca_eq *eq = &shca->eq; + int max = 3; + volatile u64 q_ofs, q_ofs2; + unsigned long flags; + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + do { + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs2 = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + max--; + } while (q_ofs == q_ofs2 && max > 0); + if (q_ofs == q_ofs2) + ehca_process_eq(shca, 0); + } + } + mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); + spin_unlock(&shca_list_lock); +} + +static int ehca_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + static unsigned long ehca_dmem_warn_time; + unsigned long flags; + + switch (action) { + case MEM_CANCEL_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_ONLINE: + case MEM_OFFLINE: + return NOTIFY_OK; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + /* only ok if no hca is attached to the lpar */ + spin_lock_irqsave(&shca_list_lock, flags); + if (list_empty(&shca_list)) { + spin_unlock_irqrestore(&shca_list_lock, flags); + return NOTIFY_OK; + } else { + spin_unlock_irqrestore(&shca_list_lock, flags); + if (printk_timed_ratelimit(&ehca_dmem_warn_time, + 30 * 1000)) + ehca_gen_err("DMEM operations are not allowed" + "in conjunction with eHCA"); + return NOTIFY_BAD; + } + } + return NOTIFY_OK; +} + +static struct notifier_block ehca_mem_nb = { + .notifier_call = ehca_mem_notifier, +}; + +static int __init ehca_module_init(void) +{ + int ret; + + printk(KERN_INFO "eHCA Infiniband Device Driver " + "(Version " HCAD_VERSION ")\n"); + + ret = ehca_create_comp_pool(); + if (ret) { + ehca_gen_err("Cannot create comp pool."); + return ret; + } + + ret = ehca_create_slab_caches(); + if (ret) { + ehca_gen_err("Cannot create SLAB caches"); + ret = -ENOMEM; + goto module_init1; + } + + ret = ehca_create_busmap(); + if (ret) { + ehca_gen_err("Cannot create busmap."); + goto module_init2; + } + + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { + ehca_gen_err("Cannot register eHCA device driver"); + ret = -EINVAL; + goto module_init3; + } + + ret = register_memory_notifier(&ehca_mem_nb); + if (ret) { + ehca_gen_err("Failed registering memory add/remove notifier"); + goto module_init4; + } + + if (ehca_poll_all_eqs != 1) { + ehca_gen_err("WARNING!!!"); + ehca_gen_err("It is possible to lose interrupts."); + } else { + init_timer(&poll_eqs_timer); + poll_eqs_timer.function = ehca_poll_eqs; + poll_eqs_timer.expires = jiffies + HZ; + add_timer(&poll_eqs_timer); + } + + return 0; + +module_init4: + ibmebus_unregister_driver(&ehca_driver); + +module_init3: + ehca_destroy_busmap(); + +module_init2: + ehca_destroy_slab_caches(); + +module_init1: + ehca_destroy_comp_pool(); + return ret; +}; + +static void __exit ehca_module_exit(void) +{ + if (ehca_poll_all_eqs == 1) + del_timer_sync(&poll_eqs_timer); + + ibmebus_unregister_driver(&ehca_driver); + + unregister_memory_notifier(&ehca_mem_nb); + + ehca_destroy_busmap(); + + ehca_destroy_slab_caches(); + + ehca_destroy_comp_pool(); + + idr_destroy(&ehca_cq_idr); + idr_destroy(&ehca_qp_idr); +}; + +module_init(ehca_module_init); +module_exit(ehca_module_exit); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c new file mode 100644 index 00000000..120aedf9 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mcast.c @@ -0,0 +1,131 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * mcast functions + * + * Authors: Khadija Souissi + * Waleri Fomin + * Reinhard Ernst + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define MAX_MC_LID 0xFFFE +#define MIN_MC_LID 0xC000 /* Multicast limits */ +#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) +#define EHCA_VALID_MULTICAST_LID(lid) \ + (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) + +int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} + +int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->pd->device, + struct ehca_shca, ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c new file mode 100644 index 00000000..b781b2cb --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -0,0 +1,2664 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "hcp_if.h" +#include "hipz_hw.h" + +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) + +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + +/* DMEM toleration management */ +#define EHCA_SECTSHIFT SECTION_SIZE_BITS +#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) +#define EHCA_HUGEPAGESHIFT 34 +#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) +#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) +#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) +#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) +#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHCA_DIR_MAP_SIZE (0x10000) +#define EHCA_ENT_MAP_SIZE (0x10000) +#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) + +static unsigned long ehca_mr_len; + +/* + * Memory map data structures + */ +struct ehca_dir_bmap { + u64 ent[EHCA_MAP_ENTRIES]; +}; +struct ehca_top_bmap { + struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; +}; +struct ehca_bmap { + struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; +}; + +static struct ehca_bmap *ehca_bmap; + +static struct kmem_cache *mr_cache; +static struct kmem_cache *mw_cache; + +enum ehca_mr_pgsize { + EHCA_MR_PGSIZE4K = 0x1000L, + EHCA_MR_PGSIZE64K = 0x10000L, + EHCA_MR_PGSIZE1M = 0x100000L, + EHCA_MR_PGSIZE16M = 0x1000000L +}; + +#define EHCA_MR_PGSHIFT4K 12 +#define EHCA_MR_PGSHIFT64K 16 +#define EHCA_MR_PGSHIFT1M 20 +#define EHCA_MR_PGSHIFT16M 24 + +static u64 ehca_map_vaddr(void *caddr); + +static u32 ehca_encode_hwpage_size(u32 pgsize) +{ + int log = ilog2(pgsize); + WARN_ON(log < 12 || log > 24 || log & 3); + return (log - 12) / 4; +} + +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) +{ + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); +} + +static struct ehca_mr *ehca_mr_new(void) +{ + struct ehca_mr *me; + + me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mrlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mr_delete(struct ehca_mr *me) +{ + kmem_cache_free(mr_cache, me); +} + +static struct ehca_mw *ehca_mw_new(void) +{ + struct ehca_mw *me; + + me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mwlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mw_delete(struct ehca_mw *me) +{ + kmem_cache_free(mw_cache, me); +} + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_maxmr; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + + if (shca->maxmr) { + e_maxmr = ehca_mr_new(); + if (!e_maxmr) { + ehca_err(&shca->ib_device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto get_dma_mr_exit0; + } + + ret = ehca_reg_maxmr(shca, e_maxmr, + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), + mr_access_flags, e_pd, + &e_maxmr->ib.ib_mr.lkey, + &e_maxmr->ib.ib_mr.rkey); + if (ret) { + ehca_mr_delete(e_maxmr); + ib_mr = ERR_PTR(ret); + goto get_dma_mr_exit0; + } + ib_mr = &e_maxmr->ib.ib_mr; + } else { + ehca_err(&shca->ib_device, "no internal max-MR exist!"); + ib_mr = ERR_PTR(-EINVAL); + goto get_dma_mr_exit0; + } + +get_dma_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", + PTR_ERR(ib_mr), pd, mr_access_flags); + return ib_mr; +} /* end ehca_get_dma_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + + u64 size; + + if ((num_phys_buf <= 0) || !phys_buf_array) { + ehca_err(pd->device, "bad input values: num_phys_buf=%x " + "phys_buf_array=%p", num_phys_buf, phys_buf_array); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, + iova_start, &size); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit0; + } + if ((size == 0) || + (((u64)iova_start + size) < (u64)iova_start)) { + ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", + size, iova_start); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_phys_mr_exit0; + } + + /* register MR on HCA */ + if (ehca_mr_is_maxmr(size, iova_start)) { + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, + e_pd, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } else { + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + /* for kernel space we try most possible pgsize */ + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, + hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_phys_mr_exit1: + ehca_mr_delete(e_mr); +reg_phys_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + PTR_ERR(ib_mr), pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ib_mr; +} /* end ehca_reg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) +{ + struct ib_mr *ib_mr; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + int ret, page_shift; + u32 num_kpages; + u32 num_hwpages; + u64 hwpage_size; + + if (!pd) { + ehca_gen_err("bad pd=%p", pd); + return ERR_PTR(-EFAULT); + } + + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + if (length == 0 || virt + length < virt) { + ehca_err(pd->device, "bad input values: length=%llx " + "virt_base=%llx", length, virt); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_user_mr_exit0; + } + + e_mr->umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(e_mr->umem)) { + ib_mr = (void *)e_mr->umem; + goto reg_user_mr_exit1; + } + + if (e_mr->umem->page_size != PAGE_SIZE) { + ehca_err(pd->device, "page size not supported, " + "e_mr->umem->page_size=%x", e_mr->umem->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit2; + } + + /* determine number of MR pages */ + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + /* select proper hw_pgsize */ + page_shift = PAGE_SHIFT; + if (e_mr->umem->hugetlb) { + /* determine page_shift, clamp between 4K and 16M */ + page_shift = (fls64(length - 1) + 3) & ~3; + page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), + EHCA_MR_PGSHIFT16M); + } + hwpage_size = 1UL << page_shift; + + /* now that we have the desired page size, shift until it's + * supported, too. 4K is always supported, so this terminates. + */ + while (!(hwpage_size & shca->hca_cap_mr_pgsize)) + hwpage_size >>= 4; + +reg_user_mr_fallback: + num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.hwpage_size = hwpage_size; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; + pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, + (&e_mr->umem->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { + ehca_warn(pd->device, "failed to register mr " + "with hwpage_size=%llx", hwpage_size); + ehca_info(pd->device, "try to register mr with " + "kpage_size=%lx", PAGE_SIZE); + /* + * this means kpages are not contiguous for a hw page + * try kernel page size as fallback solution + */ + hwpage_size = PAGE_SIZE; + goto reg_user_mr_fallback; + } + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_user_mr_exit2; + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_user_mr_exit2: + ib_umem_release(e_mr->umem); +reg_user_mr_exit1: + ehca_mr_delete(e_mr); +reg_user_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", + PTR_ERR(ib_mr), pd, mr_access_flags, udata); + return ib_mr; +} /* end ehca_reg_user_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + int ret; + + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + u64 new_size; + u64 *new_start; + u32 new_acl; + struct ehca_pd *new_pd; + u32 tmp_lkey, tmp_rkey; + unsigned long sl_flags; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; + + if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { + /* TODO not supported, because PHYP rereg hCall needs pages */ + ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " + "supported yet, mr_rereg_mask=%x", mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (mr_rereg_mask & IB_MR_REREG_PD) { + if (!pd) { + ehca_err(mr->device, "rereg with bad pd, pd=%p " + "mr_rereg_mask=%x", pd, mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + + if ((mr_rereg_mask & + ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || + (mr_rereg_mask == 0)) { + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* check other parameters */ + if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ + if (e_mr->flags & EHCA_MR_FLAG_FMR) { + ehca_err(mr->device, "not supported for FMR, mr=%p " + "flags=%x", mr, e_mr->flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (!phys_buf_array || num_phys_buf <= 0) { + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" + " phys_buf_array=%p num_phys_buf=%x", + mr_rereg_mask, phys_buf_array, num_phys_buf); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ + (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " + "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* set requested values dependent on rereg request */ + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + u64 hw_pgsize = ehca_get_max_hwpage_size(shca); + + new_start = iova_start; /* change address */ + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, + num_phys_buf, iova_start, + &new_size); + if (ret) + goto rereg_phys_mr_exit1; + if ((new_size == 0) || + (((u64)iova_start + new_size) < (u64)iova_start)) { + ehca_err(mr->device, "bad input values: new_size=%llx " + "iova_start=%p", new_size, iova_start); + ret = -EINVAL; + goto rereg_phys_mr_exit1; + } + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + + new_size, hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + } + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + new_acl = mr_access_flags; + if (mr_rereg_mask & IB_MR_REREG_PD) + new_pd = container_of(pd, struct ehca_pd, ib_pd); + + ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, + new_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto rereg_phys_mr_exit1; + + /* successful reregistration */ + if (mr_rereg_mask & IB_MR_REREG_PD) + mr->pd = pd; + mr->lkey = tmp_lkey; + mr->rkey = tmp_rkey; + +rereg_phys_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +rereg_phys_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " + "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " + "iova_start=%p", + ret, mr, mr_rereg_mask, pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ret; +} /* end ehca_rereg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + unsigned long sl_flags; + struct ehca_mr_hipzout_parms hipzout; + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto query_mr_exit0; + } + + memset(mr_attr, 0, sizeof(struct ib_mr_attr)); + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + + h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " + "hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto query_mr_exit1; + } + mr_attr->pd = mr->pd; + mr_attr->device_virt_addr = hipzout.vaddr; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; + ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); + +query_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +query_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", + ret, mr, mr_attr); + return ret; +} /* end ehca_query_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_mr(struct ib_mr *mr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto dereg_mr_exit0; + } else if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + /* TODO: BUSY: MR still has bound window(s) */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " + "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", + h_ret, shca, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto dereg_mr_exit0; + } + + if (e_mr->umem) + ib_umem_release(e_mr->umem); + + /* successful deregistration */ + ehca_mr_delete(e_mr); + +dereg_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p", ret, mr); + return ret; +} /* end ehca_dereg_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +{ + struct ib_mw *ib_mw; + u64 h_ret; + struct ehca_mw *e_mw; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_mw_hipzout_parms hipzout; + + e_mw = ehca_mw_new(); + if (!e_mw) { + ib_mw = ERR_PTR(-ENOMEM); + goto alloc_mw_exit0; + } + + h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " + "shca=%p hca_hndl=%llx mw=%p", + h_ret, shca, shca->ipz_hca_handle.handle, e_mw); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); + goto alloc_mw_exit1; + } + /* successful MW allocation */ + e_mw->ipz_mw_handle = hipzout.handle; + e_mw->ib_mw.rkey = hipzout.rkey; + return &e_mw->ib_mw; + +alloc_mw_exit1: + ehca_mw_delete(e_mw); +alloc_mw_exit0: + if (IS_ERR(ib_mw)) + ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); + return ib_mw; +} /* end ehca_alloc_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* TODO: not supported up to now */ + ehca_gen_err("bind MW currently not supported by HCAD"); + + return -EPERM; +} /* end ehca_bind_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_mw(struct ib_mw *mw) +{ + u64 h_ret; + struct ehca_shca *shca = + container_of(mw->device, struct ehca_shca, ib_device); + struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); + + h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); + if (h_ret != H_SUCCESS) { + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " + "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", + h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, + e_mw->ipz_mw_handle.handle); + return ehca2ib_return_code(h_ret); + } + /* successful deallocation */ + ehca_mw_delete(e_mw); + return 0; +} /* end ehca_dealloc_mw() */ + +/*----------------------------------------------------------------------*/ + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr *e_fmr; + int ret; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + u64 hw_pgsize; + + /* check other parameters */ + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (mr_access_flags & IB_ACCESS_MW_BIND) { + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { + ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " + "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + hw_pgsize = 1 << fmr_attr->page_shift; + if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { + ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + e_fmr = ehca_mr_new(); + if (!e_fmr) { + ib_fmr = ERR_PTR(-ENOMEM); + goto alloc_fmr_exit0; + } + e_fmr->flags |= EHCA_MR_FLAG_FMR; + + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.hwpage_size = hw_pgsize; + /* + * pginfo.num_hwpages==0, ie register_rpages() will not be called + * but deferred to map_phys_fmr() + */ + ret = ehca_reg_mr(shca, e_fmr, NULL, + fmr_attr->max_pages * (1 << fmr_attr->page_shift), + mr_access_flags, e_pd, &pginfo, + &tmp_lkey, &tmp_rkey, EHCA_REG_MR); + if (ret) { + ib_fmr = ERR_PTR(ret); + goto alloc_fmr_exit1; + } + + /* successful */ + e_fmr->hwpage_size = hw_pgsize; + e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; + e_fmr->fmr_max_pages = fmr_attr->max_pages; + e_fmr->fmr_max_maps = fmr_attr->max_maps; + e_fmr->fmr_map_cnt = 0; + return &e_fmr->ib.ib_fmr; + +alloc_fmr_exit1: + ehca_mr_delete(e_fmr); +alloc_fmr_exit0: + return ib_fmr; +} /* end ehca_alloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, + int list_len, + u64 iova) +{ + int ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + u32 tmp_lkey, tmp_rkey; + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); + if (ret) + goto map_phys_fmr_exit0; + if (iova % e_fmr->fmr_page_size) { + /* only whole-numbered pages */ + ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", + iova, e_fmr->fmr_page_size); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { + /* HCAD does not limit the maps, however trace this anyway */ + ehca_info(fmr->device, "map limit exceeded, fmr=%p " + "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", + fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); + } + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.hwpage_size = e_fmr->hwpage_size; + pginfo.num_hwpages = + list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = + (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; + + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, + list_len * e_fmr->fmr_page_size, + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto map_phys_fmr_exit0; + + /* successful reregistration */ + e_fmr->fmr_map_cnt++; + e_fmr->ib.ib_fmr.lkey = tmp_lkey; + e_fmr->ib.ib_fmr.rkey = tmp_rkey; + return 0; + +map_phys_fmr_exit0: + if (ret) + ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " + "iova=%llx", ret, fmr, page_list, list_len, iova); + return ret; +} /* end ehca_map_phys_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_fmr(struct list_head *fmr_list) +{ + int ret = 0; + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = NULL; + struct ehca_shca *prev_shca; + struct ehca_mr *e_fmr; + u32 num_fmr = 0; + u32 unmap_fmr_cnt = 0; + + /* check all FMR belong to same SHCA, and check internal flag */ + list_for_each_entry(ib_fmr, fmr_list, list) { + prev_shca = shca; + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + if ((shca != prev_shca) && prev_shca) { + ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " + "prev_shca=%p e_fmr=%p", + shca, prev_shca, e_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " + "e_fmr->flags=%x", e_fmr, e_fmr->flags); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + num_fmr++; + } + + /* loop over all FMRs to unmap */ + list_for_each_entry(ib_fmr, fmr_list, list) { + unmap_fmr_cnt++; + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + ret = ehca_unmap_one_fmr(shca, e_fmr); + if (ret) { + /* unmap failed, stop unmapping of rest of FMRs */ + ehca_err(&shca->ib_device, "unmap of one FMR failed, " + "stop rest, e_fmr=%p num_fmr=%x " + "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, + unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); + goto unmap_fmr_exit0; + } + } + +unmap_fmr_exit0: + if (ret) + ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ret, fmr_list, num_fmr, unmap_fmr_cnt); + return ret; +} /* end ehca_unmap_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_fmr(struct ib_fmr *fmr) +{ + int ret; + u64 h_ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto free_fmr_exit0; + } + + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " + "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, fmr->lkey); + ret = ehca2ib_return_code(h_ret); + goto free_fmr_exit0; + } + /* successful deregistration */ + ehca_mr_delete(e_fmr); + return 0; + +free_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); + return ret; +} /* end ehca_dealloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey, /*OUT*/ + enum ehca_reg_type reg_type) +{ + int ret; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + if (ehca_use_hp_mr == 1) + hipz_acl |= 0x00000001; + + h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " + "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_mr_exit0; + } + + e_mr->ipz_mr_handle = hipzout.handle; + + if (reg_type == EHCA_REG_BUSMAP_MR) + ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); + else if (reg_type == EHCA_REG_MR) + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + else + ret = -EINVAL; + + if (ret) + goto ehca_reg_mr_exit1; + + /* successful registration */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_mr_exit1: + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", + h_ret, shca, e_mr, iova_start, size, acl, e_pd, + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); + ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " + "not recoverable"); + } +ehca_reg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", + ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, + pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int ret = 0; + u64 h_ret; + u32 rnum; + u64 rpage; + u32 i; + u64 *kpage; + + if (!pginfo->num_hwpages) /* in case of fmr */ + return 0; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_reg_mr_rpages_exit0; + } + + /* max MAX_RPAGES ehca mr pages per register call */ + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ + if (rnum == 0) + rnum = MAX_RPAGES; /* last shot is full */ + } else + rnum = MAX_RPAGES; + + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " + "bad rc, ret=%i rnum=%x kpage=%p", + ret, rnum, kpage); + goto ehca_reg_mr_rpages_exit1; + } + + if (rnum > 1) { + rpage = virt_to_abs(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p i=%x", + kpage, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } else + rpage = *kpage; + + h_ret = hipz_h_register_rpage_mr( + shca->ipz_hca_handle, e_mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + /* + * check for 'registration complete'==H_SUCCESS + * and for 'page registered'==H_PAGE_REGISTERED + */ + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "last " + "hipz_reg_rpage_mr failed, h_ret=%lli " + "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" + " lkey=%x", h_ret, e_mr, i, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } else if (h_ret != H_PAGE_REGISTERED) { + ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " + "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " + "mr_hndl=%llx", h_ret, e_mr, i, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } /* end for(i) */ + + +ehca_reg_mr_rpages_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_reg_mr_rpages_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr_rpages() */ + +/*----------------------------------------------------------------------*/ + +inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + u32 acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + u64 *kpage; + u64 rpage; + struct ehca_mr_pginfo pginfo_save; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_rereg_mr_rereg1_exit0; + } + + pginfo_save = *pginfo; + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); + if (ret) { + ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " + "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); + goto ehca_rereg_mr_rereg1_exit1; + } + rpage = virt_to_abs(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p", kpage); + ret = -EFAULT; + goto ehca_rereg_mr_rereg1_exit1; + } + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, rpage, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * reregistration unsuccessful, try it again with the 3 hCalls, + * e.g. this is required in case H_MR_CONDITION + * (MW bound or MR is shared) + */ + ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); + *pginfo = pginfo_save; + ret = -EAGAIN; + } else if ((u64 *)hipzout.vaddr != iova_start) { + ehca_err(&shca->ib_device, "PHYP changed iova_start in " + "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " + "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, + hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey, hipzout.lkey); + ret = -EFAULT; + } else { + /* + * successful reregistration + * note: start and start_out are identical for eServer HCAs + */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + } + +ehca_rereg_mr_rereg1_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_rereg_mr_rereg1_exit0: + if ( ret && (ret != -EAGAIN) ) + ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); + return ret; +} /* end ehca_rereg_mr_rereg1() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ + + /* first determine reregistration hCall(s) */ + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ + rereg_1_hcall = 0; + rereg_3_hcall = 1; + e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; + ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", + e_mr); + } + + if (rereg_1_hcall) { + ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, + acl, e_pd, pginfo, lkey, rkey); + if (ret) { + if (ret == -EAGAIN) + rereg_3_hcall = 1; + else + goto ehca_rereg_mr_exit0; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first deregister old MR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " + "mr->lkey=%x", + h_ret, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_rereg_mr_exit0; + } + /* clean ehca_mr_t, without changing struct ib_mr and lock */ + save_mr = *e_mr; + ehca_mr_deletenew(e_mr); + + /* set some MR values */ + e_mr->flags = save_mr.flags; + e_mr->hwpage_size = save_mr.hwpage_size; + e_mr->fmr_page_size = save_mr.fmr_page_size; + e_mr->fmr_max_pages = save_mr.fmr_max_pages; + e_mr->fmr_max_maps = save_mr.fmr_max_maps; + e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, + e_pd, pginfo, lkey, rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; + memcpy(&e_mr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_rereg_mr_exit0; + } + } + +ehca_rereg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " + "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_rereg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr) +{ + int ret = 0; + u64 h_ret; + struct ehca_pd *e_pd = + container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); + struct ehca_mr save_fmr; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; + + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { + /* + * note: after using rereg hcall with len=0, + * rereg hcall must be used again for registering pages + */ + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, + 0, 0, e_pd->fw_pd, 0, &hipzout); + if (h_ret == H_SUCCESS) { + /* successful reregistration */ + e_fmr->start = NULL; + e_fmr->size = 0; + tmp_lkey = hipzout.lkey; + tmp_rkey = hipzout.rkey; + return 0; + } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " + "mr_hndl=%llx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ + } + + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->hwpage_size = save_fmr.hwpage_size; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + } + +ehca_unmap_one_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); + return ret; +} /* end ehca_unmap_one_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " + "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, + shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_smr_exit0; + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_smr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " + "e_newmr=%p iova_start=%p acl=%x e_pd=%p", + ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); + return ret; +} /* end ehca_reg_smr() */ + +/*----------------------------------------------------------------------*/ +static inline void *ehca_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHCA_DIR_INDEX_SHIFT; + ret |= top << EHCA_TOP_INDEX_SHIFT; + return abs_to_virt(ret << SECTION_SIZE_BITS); +} + +#define ehca_bmap_valid(entry) \ + ((u64)entry != (u64)EHCA_INVAL_ADDR) + +static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 h_ret = 0; + unsigned long page = 0; + u64 rpage = virt_to_abs(kpage); + int page_count; + + void *sectbase = ehca_calc_sectbase(top, dir, idx); + if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { + ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" + "hwpage_size does not fit to " + "section start address"); + } + page_count = EHCA_SECTSIZE / pginfo->hwpage_size; + + while (page < page_count) { + u64 rnum; + for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); + rnum++) { + void *pg = sectbase + ((page++) * pginfo->hwpage_size); + kpage[rnum] = virt_to_abs(pg); + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { + ehca_err(&shca->ib_device, "register_rpage_mr failed"); + return h_ret; + } + } + return h_ret; +} + +static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) + continue; + + hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, + pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, + struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +/* register internal max-MR to internal SHCA */ +int ehca_reg_internal_maxmr( + struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **e_maxmr) /*OUT*/ +{ + int ret; + struct ehca_mr *e_mr; + u64 *iova_start; + u64 size_maxmr; + struct ehca_mr_pginfo pginfo; + struct ib_phys_buf ib_pbuf; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + if (!ehca_bmap) { + ret = -EFAULT; + goto ehca_reg_internal_maxmr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(&shca->ib_device, "out of memory"); + ret = -ENOMEM; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + + /* register internal max-MR on HCA */ + size_maxmr = ehca_mr_len; + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); + ib_pbuf.addr = 0; + ib_pbuf.size = size_maxmr; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, + hw_pgsize); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); + if (ret) { + ehca_err(&shca->ib_device, "reg of internal max MR failed, " + "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); + goto ehca_reg_internal_maxmr_exit1; + } + + /* successful registration of all pages */ + e_mr->ib.ib_mr.device = e_pd->ib_pd.device; + e_mr->ib.ib_mr.pd = &e_pd->ib_pd; + e_mr->ib.ib_mr.uobject = NULL; + atomic_inc(&(e_pd->ib_pd.usecnt)); + atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); + *e_maxmr = e_mr; + return 0; + +ehca_reg_internal_maxmr_exit1: + ehca_mr_delete(e_mr); +ehca_reg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", + ret, shca, e_pd, e_maxmr); + return ret; +} /* end ehca_reg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey) +{ + u64 h_ret; + struct ehca_mr *e_origmr = shca->maxmr; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, e_origmr, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + return ehca2ib_return_code(h_ret); + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; +} /* end ehca_reg_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca) +{ + int ret; + struct ehca_mr *e_maxmr; + struct ib_pd *ib_pd; + + if (!shca->maxmr) { + ehca_err(&shca->ib_device, "bad call, shca=%p", shca); + ret = -EINVAL; + goto ehca_dereg_internal_maxmr_exit0; + } + + e_maxmr = shca->maxmr; + ib_pd = e_maxmr->ib.ib_mr.pd; + shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ + + ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); + if (ret) { + ehca_err(&shca->ib_device, "dereg internal max-MR failed, " + "ret=%i e_maxmr=%p shca=%p lkey=%x", + ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); + shca->maxmr = e_maxmr; + goto ehca_dereg_internal_maxmr_exit0; + } + + atomic_dec(&ib_pd->usecnt); + +ehca_dereg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", + ret, shca, shca->maxmr); + return ret; +} /* end ehca_dereg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* + * check physical buffer array of MR verbs for validness and + * calculates MR size + */ +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size) +{ + struct ib_phys_buf *pbuf = phys_buf_array; + u64 size_count = 0; + u32 i; + + if (num_phys_buf == 0) { + ehca_gen_err("bad phys buf array len, num_phys_buf=0"); + return -EINVAL; + } + /* check first buffer */ + if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { + ehca_gen_err("iova_start/addr mismatch, iova_start=%p " + "pbuf->addr=%llx pbuf->size=%llx", + iova_start, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && + (num_phys_buf > 1)) { + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " + "pbuf->size=%llx", pbuf->addr, pbuf->size); + return -EINVAL; + } + + for (i = 0; i < num_phys_buf; i++) { + if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { + ehca_gen_err("bad address, i=%x pbuf->addr=%llx " + "pbuf->size=%llx", + i, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((i > 0) && /* not 1st */ + (i < (num_phys_buf - 1)) && /* not last */ + (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { + ehca_gen_err("bad size, i=%x pbuf->size=%llx", + i, pbuf->size); + return -EINVAL; + } + size_count += pbuf->size; + pbuf++; + } + + *size = size_count; + return 0; +} /* end ehca_mr_chk_buf_and_calc_size() */ + +/*----------------------------------------------------------------------*/ + +/* check page list of map FMR verb for validness */ +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len) +{ + u32 i; + u64 *page; + + if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { + ehca_gen_err("bad list_len, list_len=%x " + "e_fmr->fmr_max_pages=%x fmr=%p", + list_len, e_fmr->fmr_max_pages, e_fmr); + return -EINVAL; + } + + /* each page must be aligned */ + page = page_list; + for (i = 0; i < list_len; i++) { + if (*page % e_fmr->fmr_page_size) { + ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " + "fmr_page_size=%x", i, *page, page, e_fmr, + e_fmr->fmr_page_size); + return -EINVAL; + } + page++; + } + + return 0; +} /* end ehca_fmr_check_page_list() */ + +/*----------------------------------------------------------------------*/ + +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + struct ib_umem_chunk *prev_chunk; + struct ib_umem_chunk *chunk; + u64 pgaddr; + u32 i = 0; + u32 j = 0; + int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; + + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) + << PAGE_SHIFT ; + *kpage = phys_to_abs(pgaddr + + (pginfo->next_hwpage * + pginfo->hwpage_size)); + if ( !(*kpage) ) { + ehca_gen_err("pgaddr=%llx " + "chunk->page_list[i]=%llx " + "i=%x next_hwpage=%llx", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[i]), + i, pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + i++; + } + j++; + if (j >= number) break; + } + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; + } + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); + return ret; +} + +/* + * check given pages for contiguous layout + * last page addr is returned in prev_pgaddr for further check + */ +static int ehca_check_kpages_per_ate(struct scatterlist *page_list, + int start_idx, int end_idx, + u64 *prev_pgaddr) +{ + int t; + for (t = start_idx; t <= end_idx; t++) { + u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; + if (ehca_debug_level >= 3) + ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, + *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + if (pgaddr - PAGE_SIZE != *prev_pgaddr) { + ehca_gen_err("uncontiguous page found pgaddr=%llx " + "prev_pgaddr=%llx page_list_i=%x", + pgaddr, *prev_pgaddr, t); + return -EINVAL; + } + *prev_pgaddr = pgaddr; + } + return 0; +} + +/* PAGE_SIZE < pginfo->hwpage_size */ +static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + struct ib_umem_chunk *prev_chunk; + struct ib_umem_chunk *chunk; + u64 pgaddr, prev_pgaddr; + u32 i = 0; + u32 j = 0; + int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; + int nr_kpages = kpages_per_hwpage; + + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + if (nr_kpages == kpages_per_hwpage) { + pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) + << PAGE_SHIFT ); + *kpage = phys_to_abs(pgaddr); + if ( !(*kpage) ) { + ehca_gen_err("pgaddr=%llx i=%x", + pgaddr, i); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx i=%x " + "mr_pgsize=%llx", + pgaddr, i, + pginfo->hwpage_size); + ret = -EFAULT; + return ret; + } + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = phys_to_abs( + pgaddr & + ~(pginfo->hwpage_size - 1)); + } + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)abs_to_virt( + phys_to_abs(pgaddr)); + ehca_gen_dbg("kpage=%llx chunk_page=%llx " + "value=%016llx", + *kpage, pgaddr, val); + } + prev_pgaddr = pgaddr; + i++; + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + if (i + nr_kpages > chunk->nmap) { + ret = ehca_check_kpages_per_ate( + chunk->page_list, i, + chunk->nmap - 1, &prev_pgaddr); + if (ret) return ret; + pginfo->kpage_cnt += chunk->nmap - i; + pginfo->u.usr.next_nmap += chunk->nmap - i; + nr_kpages -= chunk->nmap - i; + break; + } + + ret = ehca_check_kpages_per_ate(chunk->page_list, i, + i + nr_kpages - 1, + &prev_pgaddr); + if (ret) return ret; + i += nr_kpages; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; +next_kpage: + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; + } + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; + } + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); + return ret; +} + +static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + + pbuf->size, pginfo->hwpage_size); + offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / + pginfo->hwpage_size; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%llx num_kpages=%llx " + "hwpage_cnt=%llx " + "num_hwpages=%llx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; + } + *kpage = phys_to_abs( + (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size)); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " + "next_hwpage=%llx", pbuf->addr, + pbuf->size, pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (PAGE_SIZE >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (PAGE_SIZE / pginfo->hwpage_size) == 0) + (pginfo->kpage_cnt)++; + } else + pginfo->kpage_cnt += pginfo->hwpage_size / + PAGE_SIZE; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; + } + } + return ret; +} + +static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size); + if ( !(*kpage) ) { + ehca_gen_err("*fmrlist=%llx fmrlist=%p " + "next_listelem=%llx next_hwpage=%llx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / + pginfo->hwpage_size) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; + } else + (pginfo->next_hwpage)++; + } else { + unsigned int cnt_per_hwpage = pginfo->hwpage_size / + pginfo->u.fmr.fmr_pgsize; + unsigned int j; + u64 prev = *kpage; + /* check if adrs are contiguous */ + for (j = 1; j < cnt_per_hwpage; j++) { + u64 p = phys_to_abs(fmrlist[j] & + ~(pginfo->hwpage_size - 1)); + if (prev + pginfo->u.fmr.fmr_pgsize != p) { + ehca_gen_err("uncontiguous fmr pages " + "found prev=%llx p=%llx " + "idx=%x", prev, p, i + j); + return -EINVAL; + } + prev = p; + } + pginfo->kpage_cnt += cnt_per_hwpage; + pginfo->u.fmr.next_listelem += cnt_per_hwpage; + fmrlist += cnt_per_hwpage; + } + kpage++; + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = PAGE_SIZE >= pginfo->hwpage_size ? + ehca_set_pagebuf_user1(pginfo, number, kpage) : + ehca_set_pagebuf_user2(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + break; + } + return ret; +} /* end ehca_set_pagebuf() */ + +/*----------------------------------------------------------------------*/ + +/* + * check MR if it is a max-MR, i.e. uses whole memory + * in case it's a max-MR 1 is returned, else 0 + */ +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start) +{ + /* a MR is treated as max-MR only if it fits following: */ + if ((size == ehca_mr_len) && + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { + ehca_gen_dbg("this is a max-MR"); + return 1; + } else + return 0; +} /* end ehca_mr_is_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* map access control for MR/MW. This routine is used for MR and MW. */ +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl) +{ + *hipz_acl = 0; + if (ib_acl & IB_ACCESS_REMOTE_READ) + *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; + if (ib_acl & IB_ACCESS_REMOTE_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; + if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) + *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; + if (ib_acl & IB_ACCESS_LOCAL_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; + if (ib_acl & IB_ACCESS_MW_BIND) + *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; +} /* end ehca_mrmw_map_acl() */ + +/*----------------------------------------------------------------------*/ + +/* sets page size in hipz access control for MR/MW. */ +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ +{ + *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); +} /* end ehca_mrmw_set_pgsize_hipz_acl() */ + +/*----------------------------------------------------------------------*/ + +/* + * reverse map access control for MR/MW. + * This routine is used for MR and MW. + */ +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl) /*OUT*/ +{ + *ib_acl = 0; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) + *ib_acl |= IB_ACCESS_REMOTE_READ; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) + *ib_acl |= IB_ACCESS_REMOTE_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) + *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; + if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) + *ib_acl |= IB_ACCESS_LOCAL_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) + *ib_acl |= IB_ACCESS_MW_BIND; +} /* end ehca_mrmw_reverse_map_acl() */ + + +/*----------------------------------------------------------------------*/ + +/* + * MR destructor and constructor + * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, + * except struct ib_mr and spinlock + */ +void ehca_mr_deletenew(struct ehca_mr *mr) +{ + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; + mr->fmr_page_size = 0; + mr->fmr_max_pages = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; + memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); + memset(&mr->galpas, 0, sizeof(mr->galpas)); +} /* end ehca_mr_deletenew() */ + +int ehca_init_mrmw_cache(void) +{ + mr_cache = kmem_cache_create("ehca_cache_mr", + sizeof(struct ehca_mr), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mr_cache) + return -ENOMEM; + mw_cache = kmem_cache_create("ehca_cache_mw", + sizeof(struct ehca_mw), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mw_cache) { + kmem_cache_destroy(mr_cache); + mr_cache = NULL; + return -ENOMEM; + } + return 0; +} + +void ehca_cleanup_mrmw_cache(void) +{ + if (mr_cache) + kmem_cache_destroy(mr_cache); + if (mw_cache) + kmem_cache_destroy(mw_cache); +} + +static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, + int dir) +{ + if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { + ehca_top_bmap->dir[dir] = + kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); + if (!ehca_top_bmap->dir[dir]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); + } + return 0; +} + +static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) +{ + if (!ehca_bmap_valid(ehca_bmap->top[top])) { + ehca_bmap->top[top] = + kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); + if (!ehca_bmap->top[top]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); + } + return ehca_init_top_bmap(ehca_bmap->top[top], dir); +} + +static inline int ehca_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHCA_INDEX_MASK; +} + +void ehca_destroy_busmap(void) +{ + int top, dir; + + if (!ehca_bmap) + return; + + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + kfree(ehca_bmap->top[top]->dir[dir]); + } + + kfree(ehca_bmap->top[top]); + } + + kfree(ehca_bmap); + ehca_bmap = NULL; +} + +static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i, start_section, end_section; + int top, dir, idx; + + if (!nr_pages) + return 0; + + if (!ehca_bmap) { + ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); + if (!ehca_bmap) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); + } + + start_section = phys_to_abs(pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = phys_to_abs((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + for (i = start_section; i < end_section; i++) { + int ret; + top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); + dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); + idx = i & EHCA_INDEX_MASK; + + ret = ehca_init_bmap(ehca_bmap, top, dir); + if (ret) { + ehca_destroy_busmap(); + return ret; + } + ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; + ehca_mr_len += EHCA_SECTSIZE; + } + return 0; +} + +static int ehca_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHCA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehca_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) + return ehca_update_busmap(initial_pfn, total_nr_pages); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehca_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehca_update_busmap(start_pfn, nr_pages); + if (ret) + return ret; + /* Skip the hugepage */ + pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHCA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehca_update_busmap(start_pfn, nr_pages); +} + +int ehca_create_busmap(void) +{ + int ret; + + ehca_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehca_create_busmap_callback); + return ret; +} + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int top; + u64 hret, *kpage; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + return -ENOMEM; + } + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); + if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + ehca_free_fw_ctrlblock(kpage); + + if (hret == H_SUCCESS) + return 0; /* Everything is fine */ + else { + ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " + "h_ret=%lli e_mr=%p top=%x lkey=%x " + "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + return ehca2ib_return_code(hret); + } +} + +static u64 ehca_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long abs_addr, offset; + u64 entry; + + if (!ehca_bmap) + return EHCA_INVAL_ADDR; + + abs_addr = virt_to_abs(caddr); + top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top])) + return EHCA_INVAL_ADDR; + + dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + return EHCA_INVAL_ADDR; + + idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); + + entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; + if (ehca_bmap_valid(entry)) { + offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); + return entry | offset; + } else + return EHCA_INVAL_ADDR; +} + +static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == EHCA_INVAL_ADDR; +} + +static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (cpu_addr) + return ehca_map_vaddr(cpu_addr); + else + return EHCA_INVAL_ADDR; +} + +static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (offset + size > PAGE_SIZE) + return EHCA_INVAL_ADDR; + + addr = ehca_map_vaddr(page_address(page)); + if (!ehca_dma_mapping_error(dev, addr)) + addr += offset; + + return addr; +} + +static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + u64 addr; + addr = ehca_map_vaddr(sg_virt(sg)); + if (ehca_dma_mapping_error(dev, addr)) + return 0; + + sg->dma_address = addr; + sg->dma_length = sg->length; + } + return nents; +} + +static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg) +{ + return sg->dma_address; +} + +static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg) +{ + return sg->length; +} + +static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + u64 dma_addr; + + p = alloc_pages(flag, get_order(size)); + if (p) { + addr = page_address(p); + dma_addr = ehca_map_vaddr(addr); + if (ehca_dma_mapping_error(dev, dma_addr)) { + free_pages((unsigned long)addr, get_order(size)); + return NULL; + } + if (dma_handle) + *dma_handle = dma_addr; + return addr; + } + return NULL; +} + +static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + if (cpu_addr && size) + free_pages((unsigned long)cpu_addr, get_order(size)); +} + + +struct ib_dma_mapping_ops ehca_dma_mapping_ops = { + .mapping_error = ehca_dma_mapping_error, + .map_single = ehca_dma_map_single, + .unmap_single = ehca_dma_unmap_single, + .map_page = ehca_dma_map_page, + .unmap_page = ehca_dma_unmap_page, + .map_sg = ehca_dma_map_sg, + .unmap_sg = ehca_dma_unmap_sg, + .dma_address = ehca_dma_address, + .dma_len = ehca_dma_len, + .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, + .sync_single_for_device = ehca_dma_sync_single_for_device, + .alloc_coherent = ehca_dma_alloc_coherent, + .free_coherent = ehca_dma_free_coherent, +}; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h new file mode 100644 index 00000000..50d8b513 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -0,0 +1,132 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW declarations and inline functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EHCA_MRMW_H_ +#define _EHCA_MRMW_H_ + +enum ehca_reg_type { + EHCA_REG_MR, + EHCA_REG_BUSMAP_MR +}; + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey, + enum ehca_reg_type reg_type); + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int mr_access_flags, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr); + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_reg_internal_maxmr(struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **maxmr); + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca); + +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size); + +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len); + +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage); + +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start); + +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl); + +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); + +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl); + +void ehca_mr_deletenew(struct ehca_mr *mr); + +int ehca_create_busmap(void); + +void ehca_destroy_busmap(void); + +extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; +#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c new file mode 100644 index 00000000..351577a6 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -0,0 +1,124 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * PD functions + * + * Authors: Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" + +static struct kmem_cache *pd_cache; + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct ehca_pd *pd; + int i; + + pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); + if (!pd) { + ehca_err(device, "device=%p context=%p out of memory", + device, context); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < 2; i++) { + INIT_LIST_HEAD(&pd->free[i]); + INIT_LIST_HEAD(&pd->full[i]); + } + mutex_init(&pd->lock); + + /* + * Kernel PD: when device = -1, 0 + * User PD: when context != -1 + */ + if (!context) { + /* + * Kernel PDs after init reuses always + * the one created in ehca_shca_reopen() + */ + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + pd->fw_pd.value = shca->pd->fw_pd.value; + } else + pd->fw_pd.value = (u64)pd; + + return &pd->ib_pd; +} + +int ehca_dealloc_pd(struct ib_pd *pd) +{ + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + int i, leftovers = 0; + struct ipz_small_queue_page *page, *tmp; + + for (i = 0; i < 2; i++) { + list_splice(&my_pd->full[i], &my_pd->free[i]); + list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { + leftovers = 1; + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } + } + + if (leftovers) + ehca_warn(pd->device, + "Some small queue pages were not freed"); + + kmem_cache_free(pd_cache, my_pd); + + return 0; +} + +int ehca_init_pd_cache(void) +{ + pd_cache = kmem_cache_create("ehca_cache_pd", + sizeof(struct ehca_pd), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!pd_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_pd_cache(void) +{ + if (pd_cache) + kmem_cache_destroy(pd_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h new file mode 100644 index 00000000..90c4efa6 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -0,0 +1,260 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Hardware request structures + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _EHCA_QES_H_ +#define _EHCA_QES_H_ + +#include "ehca_tools.h" + +/* virtual scatter gather entry to specify remote addresses with length */ +struct ehca_vsgentry { + u64 vaddr; + u32 lkey; + u32 length; +}; + +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) + +/* + * Unreliable Datagram Address Vector Format + * see IBTA Vol1 chapter 8.3 Global Routing Header + */ +struct ehca_ud_av { + u8 sl; + u8 lnh; + u16 dlid; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 slid_path_bits; + u8 reserved4; + u8 ipd; + u8 reserved5; + u8 pmtu; + u32 reserved6; + u64 reserved7; + union { + struct { + u64 word_0; /* always set to 6 */ + /*should be 0x1B for IB transport */ + u64 word_1; + u64 word_2; + u64 word_3; + u64 word_4; + } grh; + struct { + u32 wd_0; + u32 wd_1; + /* DWord_1 --> SGID */ + + u32 sgid_wd3; + u32 sgid_wd2; + + u32 sgid_wd1; + u32 sgid_wd0; + /* DWord_3 --> DGID */ + + u32 dgid_wd3; + u32 dgid_wd2; + + u32 dgid_wd1; + u32 dgid_wd0; + } grh_l; + }; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define MAX_WQE_SG_ENTRIES 252 + +#define WQE_OPTYPE_SEND 0x80 +#define WQE_OPTYPE_RDMAREAD 0x40 +#define WQE_OPTYPE_RDMAWRITE 0x20 +#define WQE_OPTYPE_CMPSWAP 0x10 +#define WQE_OPTYPE_FETCHADD 0x08 +#define WQE_OPTYPE_BIND 0x04 + +#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 +#define WQE_WRFLAG_FENCE 0x40 +#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 +#define WQE_WRFLAG_SOLIC_EVENT 0x10 + +#define WQEF_CACHE_HINT 0x80 +#define WQEF_CACHE_HINT_RD_WR 0x40 +#define WQEF_TIMED_WQE 0x20 +#define WQEF_PURGE 0x08 +#define WQEF_HIGH_NIBBLE 0xF0 + +#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 +#define MW_BIND_ACCESSCTRL_R_READ 0x20 +#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 + +struct ehca_wqe { + u64 work_request_id; + u8 optype; + u8 wr_flag; + u16 pkeyi; + u8 wqef; + u8 nr_of_data_seg; + u16 wqe_provided_slid; + u32 destination_qp_number; + u32 resync_psn_sqp; + u32 local_ee_context_qkey; + u32 immediate_data; + union { + struct { + u64 remote_virtual_address; + u32 rkey; + u32 reserved; + u64 atomic_1st_op_dma_len; + u64 atomic_2nd_op; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + + } nud; + struct { + u64 ehca_ud_av_ptr; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } ud_avp; + struct { + struct ehca_ud_av ud_av; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - + 2]; + } ud_av; + struct { + u64 reserved0; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } all_rcv; + + struct { + u64 reserved; + u32 rkey; + u32 old_rkey; + u64 reserved1; + u64 reserved2; + u64 virtual_address; + u32 reserved3; + u32 length; + u32 reserved4; + u16 reserved5; + u8 reserved6; + u8 lr_ctl; + u32 lkey; + u32 reserved7; + u64 reserved8; + u64 reserved9; + u64 reserved10; + u64 reserved11; + } bind; + struct { + u64 reserved12; + u64 reserved13; + u32 size; + u32 start; + } inline_data; + } u; + +}; + +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) +#define WC_STATUS_ERROR_BIT 0x80000000 +#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 +#define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 + +struct ehca_cqe { + u64 work_request_id; + u8 optype; + u8 w_completion_flags; + u16 reserved1; + u32 nr_bytes_transferred; + u32 immediate_data; + u32 local_qp_number; + u8 freed_resource_count; + u8 service_level; + u16 wqe_count; + u32 qp_token; + u32 qkey_ee_token; + u32 remote_qp_number; + u16 dlid; + u16 rlid; + u16 reserved2; + u16 pkey_index; + u32 cqe_timestamp; + u32 wqe_timestamp; + u8 wqe_timestamp_valid; + u8 reserved3; + u8 reserved4; + u8 cqe_flags; + u32 status; +}; + +struct ehca_eqe { + u64 entry; +}; + +struct ehca_mrte { + u64 starting_va; + u64 length; /* length of memory region in bytes*/ + u32 pd; + u8 key_instance; + u8 pagesize; + u8 mr_control; + u8 local_remote_access_ctrl; + u8 reserved[0x20 - 0x18]; + u64 at_pointer[4]; +}; +#endif /*_EHCA_QES_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c new file mode 100644 index 00000000..964f8552 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -0,0 +1,2261 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * QP functions + * + * Authors: Joachim Fenkes + * Stefan Roscher + * Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static struct kmem_cache *qp_cache; + +/* + * attributes not supported by query qp + */ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ + IB_QP_EN_SQD_ASYNC_NOTIFY) + +/* + * ehca (internal) qp state values + */ +enum ehca_qp_state { + EHCA_QPS_RESET = 1, + EHCA_QPS_INIT = 2, + EHCA_QPS_RTR = 3, + EHCA_QPS_RTS = 5, + EHCA_QPS_SQD = 6, + EHCA_QPS_SQE = 8, + EHCA_QPS_ERR = 128 +}; + +/* + * qp state transitions as defined by IB Arch Rel 1.1 page 431 + */ +enum ib_qp_statetrans { + IB_QPST_ANY2RESET, + IB_QPST_ANY2ERR, + IB_QPST_RESET2INIT, + IB_QPST_INIT2RTR, + IB_QPST_INIT2INIT, + IB_QPST_RTR2RTS, + IB_QPST_RTS2SQD, + IB_QPST_RTS2RTS, + IB_QPST_SQD2RTS, + IB_QPST_SQE2RTS, + IB_QPST_SQD2SQD, + IB_QPST_MAX /* nr of transitions, this must be last!!! */ +}; + +/* + * ib2ehca_qp_state maps IB to ehca qp_state + * returns ehca qp state corresponding to given ib qp state + */ +static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) +{ + switch (ib_qp_state) { + case IB_QPS_RESET: + return EHCA_QPS_RESET; + case IB_QPS_INIT: + return EHCA_QPS_INIT; + case IB_QPS_RTR: + return EHCA_QPS_RTR; + case IB_QPS_RTS: + return EHCA_QPS_RTS; + case IB_QPS_SQD: + return EHCA_QPS_SQD; + case IB_QPS_SQE: + return EHCA_QPS_SQE; + case IB_QPS_ERR: + return EHCA_QPS_ERR; + default: + ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); + return -EINVAL; + } +} + +/* + * ehca2ib_qp_state maps ehca to IB qp_state + * returns ib qp state corresponding to given ehca qp state + */ +static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state + ehca_qp_state) +{ + switch (ehca_qp_state) { + case EHCA_QPS_RESET: + return IB_QPS_RESET; + case EHCA_QPS_INIT: + return IB_QPS_INIT; + case EHCA_QPS_RTR: + return IB_QPS_RTR; + case EHCA_QPS_RTS: + return IB_QPS_RTS; + case EHCA_QPS_SQD: + return IB_QPS_SQD; + case EHCA_QPS_SQE: + return IB_QPS_SQE; + case EHCA_QPS_ERR: + return IB_QPS_ERR; + default: + ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); + return -EINVAL; + } +} + +/* + * ehca_qp_type used as index for req_attr and opt_attr of + * struct ehca_modqp_statetrans + */ +enum ehca_qp_type { + QPT_RC = 0, + QPT_UC = 1, + QPT_UD = 2, + QPT_SQP = 3, + QPT_MAX +}; + +/* + * ib2ehcaqptype maps Ib to ehca qp_type + * returns ehca qp type corresponding to ib qp type + */ +static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return QPT_SQP; + case IB_QPT_RC: + return QPT_RC; + case IB_QPT_UC: + return QPT_UC; + case IB_QPT_UD: + return QPT_UD; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, + int ib_tostate) +{ + int index = -EINVAL; + switch (ib_tostate) { + case IB_QPS_RESET: + index = IB_QPST_ANY2RESET; + break; + case IB_QPS_INIT: + switch (ib_fromstate) { + case IB_QPS_RESET: + index = IB_QPST_RESET2INIT; + break; + case IB_QPS_INIT: + index = IB_QPST_INIT2INIT; + break; + } + break; + case IB_QPS_RTR: + if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2RTR; + break; + case IB_QPS_RTS: + switch (ib_fromstate) { + case IB_QPS_RTR: + index = IB_QPST_RTR2RTS; + break; + case IB_QPS_RTS: + index = IB_QPST_RTS2RTS; + break; + case IB_QPS_SQD: + index = IB_QPST_SQD2RTS; + break; + case IB_QPS_SQE: + index = IB_QPST_SQE2RTS; + break; + } + break; + case IB_QPS_SQD: + if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2SQD; + break; + case IB_QPS_SQE: + break; + case IB_QPS_ERR: + index = IB_QPST_ANY2ERR; + break; + default: + break; + } + return index; +} + +/* + * ibqptype2servicetype returns hcp service type corresponding to given + * ib qp type used by create_qp() + */ +static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return ST_UD; + case IB_QPT_RC: + return ST_RC; + case IB_QPT_UC: + return ST_UC; + case IB_QPT_UD: + return ST_UD; + case IB_QPT_RAW_IPV6: + return -EINVAL; + case IB_QPT_RAW_ETHERTYPE: + return -EINVAL; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +/* + * init userspace queue info from ipz_queue data + */ +static inline void queue2resp(struct ipzu_queue_resp *resp, + struct ipz_queue *queue) +{ + resp->qe_size = queue->qe_size; + resp->act_nr_of_sg = queue->act_nr_of_sg; + resp->queue_length = queue->queue_length; + resp->pagesize = queue->pagesize; + resp->toggle_state = queue->toggle_state; + resp->offset = queue->offset; +} + +/* + * init_qp_queue initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_pd *pd, + struct ehca_qp *my_qp, + struct ipz_queue *queue, + int q_type, + u64 expected_hret, + struct ehca_alloc_queue_parms *parms, + int wqe_size) +{ + int ret, cnt, ipz_rc, nr_q_pages; + void *vpage; + u64 rpage, h_ret; + struct ib_device *ib_dev = &shca->ib_device; + struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; + + if (!parms->queue_size) + return 0; + + if (parms->is_small) { + nr_q_pages = 1; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + 128 << parms->page_size, + wqe_size, parms->act_nr_sges, 1); + } else { + nr_q_pages = parms->queue_size; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + EHCA_PAGESIZE, wqe_size, + parms->act_nr_sges, 0); + } + + if (!ipz_rc) { + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", + ipz_rc); + return -EBUSY; + } + + /* register queue pages */ + for (cnt = 0; cnt < nr_q_pages; cnt++) { + vpage = ipz_qpageit_get_inc(queue); + if (!vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "failed p_vpage= %p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + NULL, 0, q_type, + rpage, parms->is_small ? 0 : 1, + my_qp->galpas.kernel); + if (cnt == (nr_q_pages - 1)) { /* last page! */ + if (h_ret != expected_hret) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "should not succeed vpage=%p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + } + } + + ipz_qeit_reset(queue); + + return 0; + +init_qp_queue1: + ipz_queue_dtor(pd, queue); + return ret; +} + +static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) +{ + if (is_llqp) + return 128 << act_nr_sge; + else + return offsetof(struct ehca_wqe, + u.nud.sg_list[act_nr_sge]); +} + +static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, + int req_nr_sge, int is_llqp) +{ + u32 wqe_size, q_size; + int act_nr_sge = req_nr_sge; + + if (!is_llqp) + /* round up #SGEs so WQE size is a power of 2 */ + for (act_nr_sge = 4; act_nr_sge <= 252; + act_nr_sge = 4 + 2 * act_nr_sge) + if (act_nr_sge >= req_nr_sge) + break; + + wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); + q_size = wqe_size * (queue->max_wr + 1); + + if (q_size <= 512) + queue->page_size = 2; + else if (q_size <= 1024) + queue->page_size = 3; + else + queue->page_size = 0; + + queue->is_small = (queue->page_size != 0); +} + +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = qmap->entries - 1; + qmap->left_to_poll = 0; + qmap->next_wqe_idx = 0; + for (i = 0; i < qmap->entries; i++) { + qmap->map[i].reported = 1; + qmap->map[i].cqe_req = 0; + } +} + +/* + * Create an ib_qp struct that is either a QP or an SRQ, depending on + * the value of the is_srq parameter. If init_attr and srq_init_attr share + * fields, the field out of init_attr is used. + */ +static struct ehca_qp *internal_create_qp( + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) +{ + struct ehca_qp *my_qp, *my_srq = NULL; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct ib_ucontext *context = NULL; + u64 h_ret; + int is_llqp = 0, has_srq = 0, is_user = 0; + int qp_type, max_send_sge, max_recv_sge, ret; + + /* h_call's out parameters */ + struct ehca_alloc_qp_parms parms; + u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; + unsigned long flags; + + if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", shca->max_num_qps); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + if (init_attr->create_flags) { + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + memset(&parms, 0, sizeof(parms)); + qp_type = init_attr->qp_type; + + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && + init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { + ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", + init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* save LLQP info */ + if (qp_type & 0x80) { + is_llqp = 1; + parms.ext_type = EQPT_LLQP; + parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; + } + qp_type &= 0x1F; + init_attr->qp_type &= 0x1F; + + /* handle SRQ base QPs */ + if (init_attr->srq) { + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); + + if (qp_type == IB_QPT_UC) { + ehca_err(pd->device, "UC with SRQ not supported"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + has_srq = 1; + parms.ext_type = EQPT_SRQBASE; + parms.srq_qpn = my_srq->real_qp_num; + } + + if (is_llqp && has_srq) { + ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* handle SRQs */ + if (is_srq) { + parms.ext_type = EQPT_SRQ; + parms.srq_limit = srq_init_attr->attr.srq_limit; + if (init_attr->cap.max_recv_sge > 3) { + ehca_err(pd->device, "no more than three SGEs " + "supported for SRQ pd=%p max_sge=%x", + pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + /* check QP type */ + if (qp_type != IB_QPT_UD && + qp_type != IB_QPT_UC && + qp_type != IB_QPT_RC && + qp_type != IB_QPT_SMI && + qp_type != IB_QPT_GSI) { + ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + if (is_llqp) { + switch (qp_type) { + case IB_QPT_RC: + if ((init_attr->cap.max_send_wr > 255) || + (init_attr->cap.max_recv_wr > 255)) { + ehca_err(pd->device, + "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + case IB_QPT_UD: + if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { + ehca_err(pd->device, "UD LLQP not supported " + "by this adapter"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOSYS); + } + if (!(init_attr->cap.max_send_sge <= 5 + && init_attr->cap.max_send_sge >= 1 + && init_attr->cap.max_recv_sge <= 5 + && init_attr->cap.max_recv_sge >= 1)) { + ehca_err(pd->device, + "Invalid Number of max_send_sge=%x " + "or max_recv_sge=%x for UD LLQP", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } else if (init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of " + "max_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + default: + ehca_err(pd->device, "unsupported LL QP Type=%x", + qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } else { + int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI + || qp_type == IB_QPT_GSI) ? 250 : 252; + + if (init_attr->cap.max_send_sge > max_sge + || init_attr->cap.max_recv_sge > max_sge) { + ehca_err(pd->device, "Invalid number of SGEs requested " + "send_sge=%x recv_sge=%x max_sge=%x", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); + if (!my_qp) { + ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOMEM); + } + + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); + spin_lock_init(&my_qp->spinlock_s); + spin_lock_init(&my_qp->spinlock_r); + my_qp->qp_type = qp_type; + my_qp->ext_type = parms.ext_type; + my_qp->state = IB_QPS_RESET; + + if (init_attr->recv_cq) + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + if (init_attr->send_cq) + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + + do { + if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + ehca_err(pd->device, "Can't reserve idr resources."); + goto create_qp_exit0; + } + + write_lock_irqsave(&ehca_qp_idr_lock, flags); + ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + } while (ret == -EAGAIN); + + if (ret) { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + goto create_qp_exit0; + } + + if (my_qp->token > 0x1FFFFFF) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + goto create_qp_exit1; + } + + if (has_srq) + parms.srq_token = my_qp->token; + + parms.servicetype = ibqptype2servicetype(qp_type); + if (parms.servicetype < 0) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid qp_type=%x", qp_type); + goto create_qp_exit1; + } + + /* Always signal by WQE so we can hide circ. WQEs */ + parms.sigtype = HCALL_SIGT_BY_WQE; + + /* UD_AV CIRCUMVENTION */ + max_send_sge = init_attr->cap.max_send_sge; + max_recv_sge = init_attr->cap.max_recv_sge; + if (parms.servicetype == ST_UD && !is_llqp) { + max_send_sge += 2; + max_recv_sge += 2; + } + + parms.token = my_qp->token; + parms.eq_handle = shca->eq.ipz_eq_handle; + parms.pd = my_pd->fw_pd; + if (my_qp->send_cq) + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + if (my_qp->recv_cq) + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; + + parms.squeue.max_wr = init_attr->cap.max_send_wr; + parms.rqueue.max_wr = init_attr->cap.max_recv_wr; + parms.squeue.max_sge = max_send_sge; + parms.rqueue.max_sge = max_recv_sge; + + /* RC QPs need one more SWQE for unsolicited ack circumvention */ + if (qp_type == IB_QPT_RC) + parms.squeue.max_wr++; + + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { + if (HAS_SQ(my_qp)) + ehca_determine_small_queue( + &parms.squeue, max_send_sge, is_llqp); + if (HAS_RQ(my_qp)) + ehca_determine_small_queue( + &parms.rqueue, max_recv_sge, is_llqp); + parms.qp_storage = + (parms.squeue.is_small || parms.rqueue.is_small); + } + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit1; + } + + ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; + my_qp->ipz_qp_handle = parms.qp_handle; + my_qp->galpas = parms.galpas; + + swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); + rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); + + switch (qp_type) { + case IB_QPT_RC: + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } + /* hide the extra WQE */ + parms.squeue.act_nr_wqes--; + break; + case IB_QPT_UD: + case IB_QPT_GSI: + case IB_QPT_SMI: + /* UD circumvention */ + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } else { + parms.squeue.act_nr_sges -= 2; + parms.rqueue.act_nr_sges -= 2; + } + + if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { + parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; + parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; + parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; + parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; + ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; + } + + break; + + default: + break; + } + + /* initialize r/squeue and register queue pages */ + if (HAS_SQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, + HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, + &parms.squeue, swqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize squeue " + "and pages ret=%i", ret); + goto create_qp_exit2; + } + + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); + } + } + + if (HAS_RQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, &parms.rqueue, rwqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize rqueue " + "and pages ret=%i", ret); + goto create_qp_exit4; + } + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } + } else if (init_attr->srq && !is_user) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; + } + + if (is_srq) { + my_qp->ib_srq.pd = &my_pd->ib_pd; + my_qp->ib_srq.device = my_pd->ib_pd.device; + + my_qp->ib_srq.srq_context = init_attr->qp_context; + my_qp->ib_srq.event_handler = init_attr->event_handler; + } else { + my_qp->ib_qp.qp_num = ib_qp_num; + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; + + my_qp->ib_qp.qp_type = qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + } + + init_attr->cap.max_inline_data = 0; /* not supported yet */ + init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; + init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; + init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; + init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; + my_qp->init_attr = *init_attr; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + &my_qp->ib_qp; + if (ehca_nr_ports < 0) { + /* alloc array to cache subsequent modify qp parms + * for autodetect mode + */ + my_qp->mod_qp_parm = + kzalloc(EHCA_MOD_QP_PARM_MAX * + sizeof(*my_qp->mod_qp_parm), + GFP_KERNEL); + if (!my_qp->mod_qp_parm) { + ehca_err(pd->device, + "Could not alloc mod_qp_parm"); + goto create_qp_exit5; + } + } + } + + /* NOTE: define_apq0() not supported yet */ + if (qp_type == IB_QPT_GSI) { + h_ret = ehca_define_sqp(shca, my_qp, init_attr); + if (h_ret != H_SUCCESS) { + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + /* the QP pointer is no longer valid */ + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + NULL; + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit6; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); + if (ret) { + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%i", ret); + goto create_qp_exit7; + } + } + + /* copy queues, galpa data to user space */ + if (context && udata) { + struct ehca_create_qp_resp resp; + memset(&resp, 0, sizeof(resp)); + + resp.qp_num = my_qp->real_qp_num; + resp.token = my_qp->token; + resp.qp_type = my_qp->qp_type; + resp.ext_type = my_qp->ext_type; + resp.qkey = my_qp->qkey; + resp.real_qp_num = my_qp->real_qp_num; + + if (HAS_SQ(my_qp)) + queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + resp.fw_handle_ofs = (u32) + (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); + + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + ehca_err(pd->device, "Copy to udata failed"); + ret = -EINVAL; + goto create_qp_exit8; + } + } + + return my_qp; + +create_qp_exit8: + ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); + +create_qp_exit7: + kfree(my_qp->mod_qp_parm); + +create_qp_exit6: + if (HAS_RQ(my_qp) && !is_user) + vfree(my_qp->rq_map.map); + +create_qp_exit5: + if (HAS_RQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + +create_qp_exit4: + if (HAS_SQ(my_qp) && !is_user) + vfree(my_qp->sq_map.map); + +create_qp_exit3: + if (HAS_SQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + +create_qp_exit2: + hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + +create_qp_exit1: + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + +create_qp_exit0: + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return ERR_PTR(ret); +} + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct ehca_qp *ret; + + ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ib_qp_init_attr qp_init_attr; + struct ehca_qp *my_qp; + struct ib_srq *ret; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 hret, update_mask; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + /* For common attributes, internal_create_qp() takes its info + * out of qp_init_attr, so copy all common attrs there. + */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.event_handler = srq_init_attr->event_handler; + qp_init_attr.qp_context = srq_init_attr->srq_context; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; + qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; + + my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); + if (IS_ERR(my_qp)) + return (struct ib_srq *)my_qp; + + /* copy back return values */ + srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; + srq_init_attr->attr.max_sge = 3; + + /* drive SRQ into RTR state */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(pd->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + ret = ERR_PTR(-ENOMEM); + goto create_srq1; + } + + mqpcb->qp_state = EHCA_QPS_INIT; + mqpcb->prim_phys_port = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to INIT " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_enable = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not enable SRQ " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_state = EHCA_QPS_RTR; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to RTR " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + ehca_free_fw_ctrlblock(mqpcb); + + return &my_qp->ib_srq; + +create_srq2: + ret = ERR_PTR(ehca2ib_return_code(hret)); + ehca_free_fw_ctrlblock(mqpcb); + +create_srq1: + internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); + + return ret; +} + +/* + * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts + * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe + * returns total number of bad wqes in bad_wqe_cnt + */ +static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, + int *bad_wqe_cnt) +{ + u64 h_ret; + struct ipz_queue *squeue; + void *bad_send_wqe_p, *bad_send_wqe_v; + u64 q_ofs; + struct ehca_wqe *wqe; + int qp_num = my_qp->ib_qp.qp_num; + + /* get send wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &bad_send_wqe_p, NULL, 2); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" + " ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); + ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", + qp_num, bad_send_wqe_p); + /* convert wqe pointer to vadr */ + bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); + if (ehca_debug_level >= 2) + ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); + squeue = &my_qp->ipz_squeue; + if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { + ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" + " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); + return -EFAULT; + } + + /* loop sets wqe's purge bit */ + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = 0; + while (wqe->optype != 0xff && wqe->wqef != 0xff) { + if (ehca_debug_level >= 2) + ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); + wqe->nr_of_data_seg = 0; /* suppress data access */ + wqe->wqef = WQEF_PURGE; /* WQE to be purged */ + q_ofs = ipz_queue_advance_offset(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = (*bad_wqe_cnt)+1; + } + /* + * bad wqe will be reprocessed and ignored when pol_cq() is called, + * i.e. nr of wqes with flush error status is one less + */ + ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", + qp_num, (*bad_wqe_cnt)-1); + wqe->wqef = 0; + + return 0; +} + +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + unsigned int tail_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = abs_to_virt(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + tail_idx = next_index(qmap->tail, qmap->entries); + wqe_idx = q_ofs / ipz_queue->qe_size; + + /* check all processed wqes, whether a cqe is requested or not */ + while (tail_idx != wqe_idx) { + if (qmap->map[tail_idx].cqe_req) + qmap->left_to_poll++; + tail_idx = next_index(tail_idx, qmap->entries); + } + /* save index in queue, where we have to start flushing */ + qmap->next_wqe_idx = wqe_idx; + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + +/* + * internal_modify_qp with circumvention to handle aqp0 properly + * smi_reset2init indicates if this is an internal reset-to-init-call for + * smi. This flag must always be zero if called from ehca_modify_qp()! + * This internal func was intorduced to avoid recursion of ehca_modify_qp()! + */ +static int internal_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, int smi_reset2init) +{ + enum ib_qp_state qp_cur_state, qp_new_state; + int cnt, qp_attr_idx, ret = 0; + enum ib_qp_statetrans statetrans; + struct hcp_modify_qp_control_block *mqpcb; + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = + container_of(ibqp->pd->device, struct ehca_shca, ib_device); + u64 update_mask; + u64 h_ret; + int bad_wqe_cnt = 0; + int is_user = 0; + int squeue_locked = 0; + unsigned long flags = 0; + + /* do query_qp to obtain current attr values */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!mqpcb) { + ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + mqpcb, my_qp->galpas.kernel); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, ibqp->qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto modify_qp_exit1; + } + if (ibqp->uobject) + is_user = 1; + + qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); + + if (qp_cur_state == -EINVAL) { /* invalid qp state */ + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + mqpcb->qp_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + /* + * circumvention to set aqp0 initial state to init + * as expected by IB spec + */ + if (smi_reset2init == 0 && + ibqp->qp_type == IB_QPT_SMI && + qp_cur_state == IB_QPS_RESET && + (attr_mask & IB_QP_STATE) && + attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ + struct ib_qp_attr smiqp_attr = { + .qp_state = IB_QPS_INIT, + .port_num = my_qp->init_attr.port_num, + .pkey_index = 0, + .qkey = 0 + }; + int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | + IB_QP_PKEY_INDEX | IB_QP_QKEY; + int smirc = internal_modify_qp( + ibqp, &smiqp_attr, smiqp_attr_mask, 1); + if (smirc) { + ehca_err(ibqp->device, "SMI RESET -> INIT failed. " + "ehca_modify_qp() rc=%i", smirc); + ret = H_PARAMETER; + goto modify_qp_exit1; + } + qp_cur_state = IB_QPS_INIT; + ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); + } + /* is transmitted current state equal to "real" current state */ + if ((attr_mask & IB_QP_CUR_STATE) && + qp_cur_state != attr->cur_qp_state) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" + " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", + attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); + + qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; + if (!smi_reset2init && + !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, + attr_mask)) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid qp transition new_state=%x cur_state=%x " + "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, + qp_cur_state, my_qp, ibqp->qp_num, attr_mask); + goto modify_qp_exit1; + } + + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + else { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid new qp state=%x " + "ehca_qp=%p qp_num=%x", + qp_new_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + /* retrieve state transition struct to get req and opt attrs */ + statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); + if (statetrans < 0) { + ret = -EINVAL; + ehca_err(ibqp->device, " qp_cur_state=%x " + "new_qp_state=%x State_xsition=%x ehca_qp=%p " + "qp_num=%x", qp_cur_state, qp_new_state, + statetrans, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); + + if (qp_attr_idx < 0) { + ret = qp_attr_idx; + ehca_err(ibqp->device, + "Invalid QP type=%x ehca_qp=%p qp_num=%x", + ibqp->qp_type, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, + "ehca_qp=%p qp_num=%x qp_state_xsit=%x", + my_qp, ibqp->qp_num, statetrans); + + /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set + * in non-LL UD QPs. + */ + if ((my_qp->qp_type == IB_QPT_UD) && + (my_qp->ext_type != EQPT_LLQP) && + (statetrans == IB_QPST_INIT2RTR) && + (shca->hw_level >= 0x22)) { + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + } + + /* sqe -> rts: set purge bit of bad wqe before actual trans */ + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* mark next free wqe if kernel */ + if (!ibqp->uobject) { + struct ehca_wqe *wqe; + /* lock send queue */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + squeue_locked = 1; + /* mark next free wqe */ + wqe = (struct ehca_wqe *) + ipz_qeit_get(&my_qp->ipz_squeue); + wqe->optype = wqe->wqef = 0xff; + ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", + ibqp->qp_num, wqe); + } + ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); + if (ret) { + ehca_err(ibqp->device, "prepare_sqe_rts() failed " + "ehca_qp=%p qp_num=%x ret=%i", + my_qp, ibqp->qp_num, ret); + goto modify_qp_exit2; + } + } + + /* + * enable RDMA_Atomic_Control if reset->init und reliable con + * this is necessary since gen2 does not provide that flag, + * but pHyp requires it + */ + if (statetrans == IB_QPST_RESET2INIT && + (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { + mqpcb->rdma_atomic_ctrl = 3; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); + } + /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ + if (statetrans == IB_QPST_INIT2RTR && + (ibqp->qp_type == IB_QPT_UC) && + !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { + mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + if (attr->pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->prim_p_key_idx = attr->pkey_index; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); + } + if (attr_mask & IB_QP_PORT) { + struct ehca_sport *sport; + struct ehca_qp *aqp1; + if (attr->port_num < 1 || attr->port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + sport = &shca->sport[attr->port_num - 1]; + if (!sport->ibqp_sqp[IB_QPT_GSI]) { + /* should not occur */ + ret = -EFAULT; + ehca_err(ibqp->device, "AQP1 was not created for " + "port=%x", attr->port_num); + goto modify_qp_exit2; + } + aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], + struct ehca_qp, ib_qp); + if (ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_SMI && + aqp1->mod_qp_parm) { + /* + * firmware will reject this modify_qp() because + * port is not activated/initialized fully + */ + ret = -EFAULT; + ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " + "either port is being activated (try again) " + "or cabling issue", attr->port_num); + goto modify_qp_exit2; + } + mqpcb->prim_phys_port = attr->port_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); + } + if (attr_mask & IB_QP_QKEY) { + mqpcb->qkey = attr->qkey; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); + } + if (attr_mask & IB_QP_AV) { + mqpcb->dlid = attr->ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); + mqpcb->source_path_bits = attr->ah_attr.src_path_bits; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); + mqpcb->service_level = attr->ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); + + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag = 1; + + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid.byte[cnt] = + attr->ah_attr.grh.dgid.raw[cnt]; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); + mqpcb->flow_label = attr->ah_attr.grh.flow_label; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); + mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); + mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + /* store ld(MTU) */ + my_qp->mtu_shift = attr->path_mtu + 7; + mqpcb->path_mtu = attr->path_mtu; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); + } + if (attr_mask & IB_QP_TIMEOUT) { + mqpcb->timeout = attr->timeout; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); + } + if (attr_mask & IB_QP_RETRY_CNT) { + mqpcb->retry_count = attr->retry_cnt; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RNR_RETRY) { + mqpcb->rnr_retry_count = attr->rnr_retry; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RQ_PSN) { + mqpcb->receive_psn = attr->rq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? + attr->max_dest_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? + attr->max_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET + (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); + } + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num < 1 + || attr->alt_port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->alt_port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->alt_phys_port = attr->alt_port_num; + + if (attr->alt_pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->alt_p_key_idx = attr->alt_pkey_index; + + mqpcb->timeout_al = attr->alt_timeout; + mqpcb->dlid_al = attr->alt_ah_attr.dlid; + mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; + mqpcb->service_level_al = attr->alt_ah_attr.sl; + + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + + /* OpenIB doesn't support alternate retry counts - copy them */ + mqpcb->retry_count_al = mqpcb->retry_count; + mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) + | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag_al = 1; + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid_al.byte[cnt] = + attr->alt_ah_attr.grh.dgid.raw[cnt]; + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; + mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; + mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; + mqpcb->traffic_class_al = + attr->alt_ah_attr.grh.traffic_class; + + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); + } + + if (attr_mask & IB_QP_SQ_PSN) { + mqpcb->send_psn = attr->sq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); + } + + if (attr_mask & IB_QP_DEST_QPN) { + mqpcb->dest_qp_nr = attr->dest_qp_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state != IB_MIG_REARM + && attr->path_mig_state != IB_MIG_MIGRATED) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid mig_state=%x", + attr->path_mig_state); + goto modify_qp_exit2; + } + mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); + } + + if (attr_mask & IB_QP_CAP) { + mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); + mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); + /* no support for max_send/recv_sge yet */ + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* doorbell to reprocessing wqes */ + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, bad_wqe_cnt-1); + ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); + } + + if (statetrans == IB_QPST_RESET2INIT || + statetrans == IB_QPST_INIT2INIT) { + mqpcb->qp_enable = 1; + mqpcb->qp_state = EHCA_QPS_INIT; + update_mask = 0; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "ENABLE in context of " + "RESET_2_INIT failed! Maybe you didn't get " + "a LID h_ret=%lli ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } + + if (statetrans == IB_QPST_ANY2RESET) { + ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR && !is_user) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + if (!is_user) + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp) && !is_user) + reset_queue_map(&my_qp->rq_map); + } + + if (attr_mask & IB_QP_QKEY) + my_qp->qkey = attr->qkey; + +modify_qp_exit2: + if (squeue_locked) { /* this means: sqe -> rts */ + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + my_qp->sqerr_purgeflag = 1; + } + +modify_qp_exit1: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + int ret = 0; + + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + + /* The if-block below caches qp_attr to be modified for GSI and SMI + * qps during the initialization by ib_mad. When the respective port + * is activated, ie we got an event PORT_ACTIVE, we'll replay the + * cached modify calls sequence, see ehca_recover_sqs() below. + * Why that is required: + * 1) If one port is connected, older code requires that port one + * to be connected and module option nr_ports=1 to be given by + * user, which is very inconvenient for end user. + * 2) Firmware accepts modify_qp() only if respective port has become + * active. Older code had a wait loop of 30sec create_qp()/ + * define_aqp1(), which is not appropriate in practice. This + * code now removes that wait loop, see define_aqp1(), and always + * reports all ports to ib_mad resp. users. Only activated ports + * will then usable for the users. + */ + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + int port = my_qp->init_attr.port_num; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + /* cache qp_attr only during init */ + if (my_qp->mod_qp_parm) { + struct ehca_mod_qp_parm *p; + if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { + ehca_err(&shca->ib_device, + "mod_qp_parm overflow state=%x port=%x" + " type=%x", attr->qp_state, + my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, + flags); + return -EINVAL; + } + p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; + p->mask = attr_mask; + p->attr = *attr; + my_qp->mod_qp_parm_idx++; + ehca_dbg(&shca->ib_device, + "Saved qp_attr for state=%x port=%x type=%x", + attr->qp_state, my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + goto out; + } + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; +} + +void ehca_recover_sqp(struct ib_qp *sqp) +{ + struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); + int port = my_sqp->init_attr.port_num; + struct ib_qp_attr attr; + struct ehca_mod_qp_parm *qp_parm; + int i, qp_parm_idx, ret; + unsigned long flags, wr_cnt; + + if (!my_sqp->mod_qp_parm) + return; + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); + + qp_parm = my_sqp->mod_qp_parm; + qp_parm_idx = my_sqp->mod_qp_parm_idx; + for (i = 0; i < qp_parm_idx; i++) { + attr = qp_parm[i].attr; + ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); + if (ret) { + ehca_err(sqp->device, "Could not modify SQP port=%x " + "qp_num=%x ret=%x", port, sqp->qp_num, ret); + goto free_qp_parm; + } + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", + port, sqp->qp_num, attr.qp_state); + } + + /* re-trigger posted recv wrs */ + wr_cnt = my_sqp->ipz_rqueue.current_q_offset / + my_sqp->ipz_rqueue.qe_size; + if (wr_cnt) { + spin_lock_irqsave(&my_sqp->spinlock_r, flags); + hipz_update_rqa(my_sqp, wr_cnt); + spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); + ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", + port, sqp->qp_num, wr_cnt); + } + +free_qp_parm: + kfree(qp_parm); + /* this prevents subsequent calls to modify_qp() to cache qp_attr */ + my_sqp->mod_qp_parm = NULL; +} + +int ehca_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int cnt, ret = 0; + u64 h_ret; + + if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { + ehca_err(qp->device, "Invalid attribute mask " + "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", + my_qp, qp->qp_num, qp_attr_mask); + return -EINVAL; + } + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(qp->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(qp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp->qp_num, h_ret); + goto query_qp_exit1; + } + + qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); + qp_attr->qp_state = qp_attr->cur_qp_state; + + if (qp_attr->cur_qp_state == -EINVAL) { + ret = -EINVAL; + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + qpcb->qp_state, my_qp, qp->qp_num); + goto query_qp_exit1; + } + + if (qp_attr->qp_state == IB_QPS_SQD) + qp_attr->sq_draining = 1; + + qp_attr->qkey = qpcb->qkey; + qp_attr->path_mtu = qpcb->path_mtu; + qp_attr->path_mig_state = qpcb->path_migration_state - 1; + qp_attr->rq_psn = qpcb->receive_psn; + qp_attr->sq_psn = qpcb->send_psn; + qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; + qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; + qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; + /* UD_AV CIRCUMVENTION */ + if (my_qp->qp_type == IB_QPT_UD) { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe - 2; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe - 2; + } else { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe; + } + + qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; + qp_attr->dest_qp_num = qpcb->dest_qp_nr; + + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; + qp_attr->timeout = qpcb->timeout; + qp_attr->retry_cnt = qpcb->retry_count; + qp_attr->rnr_retry = qpcb->rnr_retry_count; + + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; + qp_attr->alt_port_num = qpcb->alt_phys_port; + qp_attr->alt_timeout = qpcb->timeout_al; + + qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; + qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; + + /* primary av */ + qp_attr->ah_attr.sl = qpcb->service_level; + + if (qpcb->send_grh_flag) { + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->ah_attr.static_rate = qpcb->max_static_rate; + qp_attr->ah_attr.dlid = qpcb->dlid; + qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; + qp_attr->ah_attr.port_num = qp_attr->port_num; + + /* primary GRH */ + qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; + qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; + qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; + qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid.byte[cnt]; + + /* alternate AV */ + qp_attr->alt_ah_attr.sl = qpcb->service_level_al; + if (qpcb->send_grh_flag_al) { + qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; + qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; + qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; + + /* alternate GRH */ + qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; + qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; + qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; + qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid_al.byte[cnt]; + + /* return init attributes given in ehca_create_qp */ + if (qp_init_attr) + *qp_init_attr = my_qp->init_attr; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); + +query_qp_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct ehca_qp *my_qp = + container_of(ibsrq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = + container_of(ibsrq->pd->device, struct ehca_shca, ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 update_mask; + u64 h_ret; + int ret = 0; + + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + update_mask = 0; + if (attr_mask & IB_SRQ_LIMIT) { + attr_mask &= ~IB_SRQ_LIMIT; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); + mqpcb->curr_srq_limit = attr->srq_limit; + mqpcb->qp_aff_asyn_ev_log_reg = + EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); + } + + /* by now, all bits in attr_mask should have been cleared */ + if (attr_mask) { + ehca_err(ibsrq->device, "invalid attribute mask bits set " + "attr_mask=%x", attr_mask); + ret = -EINVAL; + goto modify_srq_exit0; + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, + NULL, update_mask, mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, my_qp->real_qp_num); + } + +modify_srq_exit0: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) +{ + struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int ret = 0; + u64 h_ret; + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(srq->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, + NULL, qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(srq->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, h_ret); + goto query_srq_exit1; + } + + srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; + srq_attr->max_sge = 3; + srq_attr->srq_limit = qpcb->curr_srq_limit; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + +query_srq_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) +{ + struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; + u32 qp_num = my_qp->real_qp_num; + int ret; + u64 h_ret; + u8 port_num; + int is_user = 0; + enum ib_qp_type qp_type; + unsigned long flags; + + if (uobject) { + is_user = 1; + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(dev, "Resources still referenced in " + "user space qp_num=%x", qp_num); + return -EINVAL; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); + if (ret) { + ehca_err(dev, "Couldn't unassign qp from " + "send_cq ret=%i qp_num=%x cq_num=%x", ret, + qp_num, my_qp->send_cq->cq_number); + return ret; + } + } + + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp) && !is_user) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + if (h_ret != H_SUCCESS) { + ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); + return ehca2ib_return_code(h_ret); + } + + port_num = my_qp->init_attr.port_num; + qp_type = my_qp->init_attr.qp_type; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + /* no support for IB_QPT_SMI yet */ + if (qp_type == IB_QPT_GSI) { + struct ib_event event; + ehca_info(dev, "device %s: port %x is inactive.", + shca->ib_device.name, port_num); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port_num; + shca->sport[port_num - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + + if (HAS_RQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + if (!is_user) + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + if (!is_user) + vfree(my_qp->sq_map.map); + } + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return 0; +} + +int ehca_destroy_qp(struct ib_qp *qp) +{ + return internal_destroy_qp(qp->device, + container_of(qp, struct ehca_qp, ib_qp), + qp->uobject); +} + +int ehca_destroy_srq(struct ib_srq *srq) +{ + return internal_destroy_qp(srq->device, + container_of(srq, struct ehca_qp, ib_srq), + srq->uobject); +} + +int ehca_init_qp_cache(void) +{ + qp_cache = kmem_cache_create("ehca_cache_qp", + sizeof(struct ehca_qp), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!qp_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_qp_cache(void) +{ + if (qp_cache) + kmem_cache_destroy(qp_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c new file mode 100644 index 00000000..fd05f48f --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -0,0 +1,953 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * post_send/recv, poll_cq, req_notify + * + * Authors: Hoang-Nam Nguyen + * Waleri Fomin + * Joachim Fenkes + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + +static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, + struct ehca_wqe *wqe_p, + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) +{ + u8 cnt_ds; + if (unlikely((recv_wr->num_sge < 0) || + (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); + wqe_p->nr_of_data_seg = recv_wr->num_sge; + + for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { + wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = + recv_wr->sg_list[cnt_ds].addr; + wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = + recv_wr->sg_list[cnt_ds].lkey; + wqe_p->u.all_rcv.sg_list[cnt_ds].length = + recv_wr->sg_list[cnt_ds].length; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + } + + return 0; +} + +#if defined(DEBUG_GSI_SEND_WR) + +/* need ib_mad struct */ +#include + +static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +{ + int idx; + int j; + while (send_wr) { + struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; + struct ib_sge *sge = send_wr->sg_list; + ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x", idx, send_wr->wr_id, + send_wr->num_sge, send_wr->send_flags, + send_wr->opcode); + if (mad_hdr) { + ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " + "mgmt_class=%x class_version=%x method=%x " + "status=%x class_specific=%x tid=%lx " + "attr_id=%x resv=%x attr_mod=%x", + idx, mad_hdr->base_version, + mad_hdr->mgmt_class, + mad_hdr->class_version, mad_hdr->method, + mad_hdr->status, mad_hdr->class_specific, + mad_hdr->tid, mad_hdr->attr_id, + mad_hdr->resv, + mad_hdr->attr_mod); + } + for (j = 0; j < send_wr->num_sge; j++) { + u8 *data = (u8 *)abs_to_virt(sge->addr); + ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " + "lkey=%x", + idx, j, data, sge->length, sge->lkey); + /* assume length is n*16 */ + ehca_dmp(data, sge->length, "send_wr#%x sge#%x", + idx, j); + sge++; + } /* eof for j */ + idx++; + send_wr = send_wr->next; + } /* eof while send_wr */ +} + +#endif /* DEBUG_GSI_SEND_WR */ + +static inline int ehca_write_swqe(struct ehca_qp *qp, + struct ehca_wqe *wqe_p, + const struct ib_send_wr *send_wr, + u32 sq_map_idx, + int hidden) +{ + u32 idx; + u64 dma_length; + struct ehca_av *my_av; + u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; + + if (unlikely((send_wr->num_sge < 0) || + (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 0; + + switch (send_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_SEND; + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_RDMAWRITE; + break; + case IB_WR_RDMA_READ: + wqe_p->optype = WQE_OPTYPE_RDMAREAD; + break; + default: + ehca_gen_err("Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ + } + + wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; + + wqe_p->wr_flag = 0; + + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) { + wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + qmap_entry->cqe_req = 1; + } + + if (send_wr->opcode == IB_WR_SEND_WITH_IMM || + send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + /* this might not work as long as HW does not support it */ + wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); + wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; + } + + wqe_p->nr_of_data_seg = send_wr->num_sge; + + switch (qp->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + /* no break is intential here */ + case IB_QPT_UD: + /* IB 1.2 spec C10-15 compliance */ + if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = qp->qkey; + + wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->local_ee_context_qkey = remote_qkey; + if (unlikely(!send_wr->wr.ud.ah)) { + ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; + } + if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); + return -EINVAL; + } + my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + wqe_p->u.ud_av.ud_av = my_av->av; + + /* + * omitted check of IB_SEND_INLINE + * since HW does not support it + */ + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.ud_av.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.ud_av.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.ud_av.sg_list[idx].length = + send_wr->sg_list[idx].length; + } /* eof for idx */ + if (qp->qp_type == IB_QPT_SMI || + qp->qp_type == IB_QPT_GSI) + wqe_p->u.ud_av.ud_av.pmtu = 1; + if (qp->qp_type == IB_QPT_GSI) { + wqe_p->pkeyi = send_wr->wr.ud.pkey_index; +#ifdef DEBUG_GSI_SEND_WR + trace_send_wr_ud(send_wr); +#endif /* DEBUG_GSI_SEND_WR */ + } + break; + + case IB_QPT_UC: + if (send_wr->send_flags & IB_SEND_FENCE) + wqe_p->wr_flag |= WQE_WRFLAG_FENCE; + /* no break is intentional here */ + case IB_QPT_RC: + /* TODO: atomic not implemented */ + wqe_p->u.nud.remote_virtual_address = + send_wr->wr.rdma.remote_addr; + wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + + /* + * omitted checking of IB_SEND_INLINE + * since HW does not support it + */ + dma_length = 0; + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.nud.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.nud.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.nud.sg_list[idx].length = + send_wr->sg_list[idx].length; + dma_length += send_wr->sg_list[idx].length; + } /* eof idx */ + wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + + break; + + default: + ehca_gen_err("Invalid qptype=%x", qp->qp_type); + return -EINVAL; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + } + return 0; +} + +/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ +static inline void map_ib_wc_status(u32 cqe_status, + enum ib_wc_status *wc_status) +{ + if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { + switch (cqe_status & 0x3F) { + case 0x01: + case 0x21: + *wc_status = IB_WC_LOC_LEN_ERR; + break; + case 0x02: + case 0x22: + *wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case 0x03: + case 0x23: + *wc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case 0x04: + case 0x24: + *wc_status = IB_WC_LOC_PROT_ERR; + break; + case 0x05: + case 0x25: + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + case 0x06: + *wc_status = IB_WC_MW_BIND_ERR; + break; + case 0x07: /* remote error - look into bits 20:24 */ + switch ((cqe_status + & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { + case 0x0: + /* + * PSN Sequence Error! + * couldn't find a matching status! + */ + *wc_status = IB_WC_GENERAL_ERR; + break; + case 0x1: + *wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case 0x2: + *wc_status = IB_WC_REM_ACCESS_ERR; + break; + case 0x3: + *wc_status = IB_WC_REM_OP_ERR; + break; + case 0x4: + *wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + } + break; + case 0x08: + *wc_status = IB_WC_RETRY_EXC_ERR; + break; + case 0x09: + *wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case 0x0A: + case 0x2D: + *wc_status = IB_WC_REM_ABORT_ERR; + break; + case 0x0B: + case 0x2E: + *wc_status = IB_WC_INV_EECN_ERR; + break; + case 0x0C: + case 0x2F: + *wc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case 0x0D: + *wc_status = IB_WC_BAD_RESP_ERR; + break; + case 0x10: + /* WQE purged */ + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + default: + *wc_status = IB_WC_FATAL_ERR; + + } + } else + *wc_status = IB_WC_SUCCESS; +} + +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u32 sq_map_idx; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + int wqe_cnt = 0; + int ret = 0; + unsigned long flags; + + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + ret = -EINVAL; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } + + /* loop processes list of send reqs */ + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); + if (unlikely(ret)) { + goto post_send_exit0; + } + wqe_cnt++; + send_wr = send_wr->next; + } + +post_send_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, qp->qp_num, wqe_cnt, ret); + my_qp->message_count += wqe_cnt; + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; + return ret; +} + +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + u32 rq_map_idx; + unsigned long flags; + struct ehca_qmap_entry *qmap_entry; + + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + ret = -ENODEV; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_r, flags); + + /* loop processes list of recv reqs */ + while (recv_wr) { + u64 start_offset = my_qp->ipz_rqueue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + + /* write a RECV WQE into the QUEUE */ + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, + rq_map_idx); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_rqueue.current_q_offset = start_offset; + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 1; + + wqe_cnt++; + recv_wr = recv_wr->next; + } /* eof for recv_wr */ + +post_recv_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_rqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, my_qp->real_qp_num, wqe_cnt, ret); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + + return ret; +} + +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + +/* + * ib_wc_opcode table converts ehca wc opcode to ib + * Since we use zero to indicate invalid opcode, the actual ib opcode must + * be decremented!!! + */ +static const u8 ib_wc_opcode[255] = { + [0x01] = IB_WC_RECV+1, + [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, + [0x04] = IB_WC_BIND_MW+1, + [0x08] = IB_WC_FETCH_ADD+1, + [0x10] = IB_WC_COMP_SWAP+1, + [0x20] = IB_WC_RDMA_WRITE+1, + [0x40] = IB_WC_RDMA_READ+1, + [0x80] = IB_WC_SEND+1 +}; + +/* internal function to poll one entry of cq */ +static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) +{ + int ret = 0, qmap_tail_idx; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + struct ehca_cqe *cqe; + struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; + int cqe_count = 0, is_error; + +repoll: + cqe = (struct ehca_cqe *) + ipz_qeit_get_inc_valid(&my_cq->ipz_queue); + if (!cqe) { + ret = -EAGAIN; + if (ehca_debug_level >= 3) + ehca_dbg(cq->device, "Completion queue is empty " + "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); + goto poll_cq_one_exit0; + } + + /* prevents loads being reordered across this point */ + rmb(); + + cqe_count++; + if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { + struct ehca_qp *qp; + int purgeflag; + unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); + if (!qp) { + ehca_err(cq->device, "cq_num=%x qp_num=%x " + "could not find qp -> ignore cqe", + my_cq->cq_number, cqe->local_qp_number); + ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); + /* ignore this purged cqe */ + goto repoll; + } + spin_lock_irqsave(&qp->spinlock_s, flags); + purgeflag = qp->sqerr_purgeflag; + spin_unlock_irqrestore(&qp->spinlock_s, flags); + + if (purgeflag) { + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + if (ehca_debug_level >= 2) + ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, + cqe->remote_qp_number); + /* + * ignore this to avoid double cqes of bad wqe + * that caused sqe and turn off purge flag + */ + qp->sqerr_purgeflag = 0; + goto repoll; + } + } + + is_error = cqe->status & WC_STATUS_ERROR_BIT; + + /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ + if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { + ehca_dbg(cq->device, + "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", + is_error ? "ERROR " : "", my_cq, my_cq->cq_number); + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x -------------------------", + my_cq, my_cq->cq_number); + } + + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + /* advance the tail pointer */ + qmap->tail = qmap_tail_idx; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + my_qp->sq_map.left_to_poll = 0; + ehca_add_to_err_list(my_qp, 1); + + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + my_qp->rq_map.left_to_poll = 0; + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } + + /* eval ib_wc_opcode */ + wc->opcode = ib_wc_opcode[cqe->optype]-1; + if (unlikely(wc->opcode == -1)) { + ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " + "ehca_cq=%p cq_num=%x", + cqe->optype, cqe->status, my_cq, my_cq->cq_number); + /* dump cqe for other infos */ + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + /* update also queue adder to throw away this entry!!! */ + goto repoll; + } + + /* eval ib_wc_status */ + if (unlikely(is_error)) { + /* complete with errors */ + map_ib_wc_status(cqe->status, &wc->status); + wc->vendor_err = wc->status; + } else + wc->status = IB_WC_SUCCESS; + + wc->byte_len = cqe->nr_bytes_transferred; + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->rlid; + wc->dlid_path_bits = cqe->dlid; + wc->src_qp = cqe->remote_qp_number; + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); + wc->sl = cqe->service_level; + +poll_cq_one_exit0: + if (cqe_count > 0) + hipz_update_feca(my_cq, cqe_count); + + return ret; +} + +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + + memset(wc, 0, sizeof(*wc)); + + offset = qmap->next_wqe_idx * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#llx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance next_wqe pointer */ + qmap_entry->reported = 1; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + wc++; nr++; + } + + return nr; + +} + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int nr; + struct ehca_qp *err_qp; + struct ib_wc *current_wc = wc; + int ret = 0; + unsigned long flags; + int entries_left = num_entries; + + if (num_entries < 1) { + ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " + "cq_num=%x", num_entries, my_cq, my_cq->cq_number); + ret = -EINVAL; + goto poll_cq_exit0; + } + + spin_lock_irqsave(&my_cq->spinlock, flags); + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { + ret = ehca_poll_cq_one(cq, current_wc); + if (ret) + break; + current_wc++; + } /* eof for nr */ + entries_left -= nr; + + spin_unlock_irqrestore(&my_cq->spinlock, flags); + if (ret == -EAGAIN || !ret) + ret = num_entries - entries_left; + +poll_cq_exit0: + return ret; +} + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int ret = 0; + + switch (notify_flags & IB_CQ_SOLICITED_MASK) { + case IB_CQ_SOLICITED: + hipz_set_cqx_n0(my_cq, 1); + break; + case IB_CQ_NEXT_COMP: + hipz_set_cqx_n1(my_cq, 1); + break; + default: + return -EINVAL; + } + + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + ret = ipz_qeit_is_valid(&my_cq->ipz_queue); + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + } + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c new file mode 100644 index 00000000..dba8f9f8 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -0,0 +1,237 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * SQP functions + * + * Authors: Khadija Souissi + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) + +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) + +/** + * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue + * pair is created successfully, the corresponding port gets active. + * + * Define Special Queue pair 0 (SMI QP) is still not supported. + * + * @qp_init_attr: Queue pair init attributes with port and queue pair type + */ + +u64 ehca_define_sqp(struct ehca_shca *shca, + struct ehca_qp *ehca_qp, + struct ib_qp_init_attr *qp_init_attr) +{ + u32 pma_qp_nr, bma_qp_nr; + u64 ret; + u8 port = qp_init_attr->port_num; + int counter; + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + + switch (qp_init_attr->qp_type) { + case IB_QPT_SMI: + /* function not supported yet */ + break; + case IB_QPT_GSI: + ret = hipz_h_define_aqp1(shca->ipz_hca_handle, + ehca_qp->ipz_qp_handle, + ehca_qp->galpas.kernel, + (u32) qp_init_attr->port_num, + &pma_qp_nr, &bma_qp_nr); + + if (ret != H_SUCCESS) { + ehca_err(&shca->ib_device, + "Can't define AQP1 for port %x. h_ret=%lli", + port, ret); + return ret; + } + shca->sport[port - 1].pma_qp_nr = pma_qp_nr; + ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", + port, pma_qp_nr); + break; + default: + ehca_err(&shca->ib_device, "invalid qp_type=%x", + qp_init_attr->qp_type); + return H_PARAMETER; + } + + if (ehca_nr_ports < 0) /* autodetect mode */ + return H_SUCCESS; + + for (counter = 0; + shca->sport[port - 1].port_state != IB_PORT_ACTIVE && + counter < ehca_port_act_time; + counter++) { + ehca_dbg(&shca->ib_device, "... wait until port %x is active", + port); + msleep_interruptible(1000); + } + + if (counter == ehca_port_act_time) { + ehca_err(&shca->ib_device, "Port %x is not active.", port); + return H_HARDWARE; + } + + return H_SUCCESS; +} + +struct ib_perf { + struct ib_mad_hdr mad_hdr; + u8 reserved[40]; + u8 data[192]; +} __attribute__ ((packed)); + +/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ +struct tcslfl { + u32 tc:8; + u32 sl:4; + u32 fl:20; +} __attribute__ ((packed)); + +/* IP Version/TC/FL packed into 32 bits, as in GRH */ +struct vertcfl { + u32 ver:4; + u32 tc:8; + u32 fl:20; +} __attribute__ ((packed)); + +static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct ib_perf *in_perf = (struct ib_perf *)in_mad; + struct ib_perf *out_perf = (struct ib_perf *)out_mad; + struct ib_class_port_info *poi = + (struct ib_class_port_info *)out_perf->data; + struct tcslfl *tcslfl = + (struct tcslfl *)&poi->redirect_tcslfl; + struct ehca_shca *shca = + container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_sport *sport = &shca->sport[port_num - 1]; + + ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); + + *out_mad = *in_mad; + + if (in_perf->mad_hdr.class_version != 1) { + ehca_warn(ibdev, "Unsupported class_version=%x", + in_perf->mad_hdr.class_version); + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; + goto perf_reply; + } + + switch (in_perf->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + case IB_MGMT_METHOD_SET: + /* set class port info for redirection */ + out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; + out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; + memset(poi, 0, sizeof(*poi)); + poi->base_version = 1; + poi->class_version = 1; + poi->resp_time_value = 18; + + /* copy local routing information from WC where applicable */ + tcslfl->sl = in_wc->sl; + poi->redirect_lid = + sport->saved_attr.lid | in_wc->dlid_path_bits; + poi->redirect_qp = sport->pma_qp_nr; + poi->redirect_qkey = IB_QP1_QKEY; + + ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, + &poi->redirect_pkey); + + /* if request was globally routed, copy route info */ + if (in_grh) { + struct vertcfl *vertcfl = + (struct vertcfl *)&in_grh->version_tclass_flow; + memcpy(poi->redirect_gid, in_grh->dgid.raw, + sizeof(poi->redirect_gid)); + tcslfl->tc = vertcfl->tc; + tcslfl->fl = vertcfl->fl; + } else + /* else only fill in default GID */ + ehca_query_gid(ibdev, port_num, 0, + (union ib_gid *)&poi->redirect_gid); + + ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", + sport->saved_attr.lid, sport->pma_qp_nr); + break; + + case IB_MGMT_METHOD_GET_RESP: + return IB_MAD_RESULT_FAILURE; + + default: + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; + break; + } + +perf_reply: + out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + int ret; + + if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) + return IB_MAD_RESULT_FAILURE; + + /* accept only pma request */ + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return IB_MAD_RESULT_SUCCESS; + + ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); + ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, + in_mad, out_mad); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h new file mode 100644 index 00000000..54c0d23b --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -0,0 +1,156 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * auxiliary functions + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Khadija Souissi + * Waleri Fomin + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef EHCA_TOOLS_H +#define EHCA_TOOLS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +extern int ehca_debug_level; + +#define ehca_dbg(ib_dev, format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ + "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, \ + ## arg); \ + } while (0) + +#define ehca_info(ib_dev, format, arg...) \ + dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_warn(ib_dev, format, arg...) \ + dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_err(ib_dev, format, arg...) \ + dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/* use this one only if no ib_dev available */ +#define ehca_gen_dbg(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg); \ + } while (0) + +#define ehca_gen_warn(format, arg...) \ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_gen_err(format, arg...) \ + printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/** + * ehca_dmp - printk a memory block, whose length is n*8 bytes. + * Each line has the following layout: + * adr=X ofs=Y <8 bytes hex> <8 bytes hex> + */ +#define ehca_dmp(adr, len, format, args...) \ + do { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char *)(adr); \ + for (x = 0; x < l; x += 16) { \ + printk(KERN_INFO "EHCA_DMP:%s " format \ + " adr=%p ofs=%04x %016llx %016llx\n", \ + __func__, ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } while (0) + +/* define a bitmask, little endian version */ +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) + +/* define a bitmask, the ibm way... */ +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) + +/* internal function, don't use */ +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) + +/* internal function, don't use */ +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) + +/** + * EHCA_BMASK_SET - return value shifted and masked by mask + * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask + * in variable + */ +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) + +/** + * EHCA_BMASK_GET - extract a parameter from value by mask + */ +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) + +/* Converts ehca to ib return code */ +int ehca2ib_return_code(u64 ehca_rc); + +#endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c new file mode 100644 index 00000000..45ee89b6 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -0,0 +1,309 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * userspace support verbs + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata) +{ + struct ehca_ucontext *my_context; + + my_context = kzalloc(sizeof *my_context, GFP_KERNEL); + if (!my_context) { + ehca_err(device, "Out of memory device=%p", device); + return ERR_PTR(-ENOMEM); + } + + return &my_context->ib_ucontext; +} + +int ehca_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); + return 0; +} + +static void ehca_mm_open(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static void ehca_mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static const struct vm_operations_struct vm_ops = { + .open = ehca_mm_open, + .close = ehca_mm_close, +}; + +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) +{ + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize < EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); + /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, + vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%i", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; + u64 start, ofs; + struct page *page; + + vma->vm_flags |= VM_RESERVED; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%i", ret); + return ret; + } + start += PAGE_SIZE; + } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_fw() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } + + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; + } + break; + + case 1: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + case 2: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + u64 fileoffset = vma->vm_pgoff; + u32 idr_handle = fileoffset & 0x1FFFFFF; + u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ + u32 ret; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ib_uobject *uobject; + + switch (q_type) { + case 0: /* CQ */ + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, idr_handle); + read_unlock(&ehca_cq_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) + return -EINVAL; + + if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* QP */ + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, idr_handle); + read_unlock(&ehca_qp_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) + return -EINVAL; + + uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; + if (!uobject || uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c new file mode 100644 index 00000000..e6f9cdd9 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -0,0 +1,969 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Joachim Fenkes + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hcp_phyp.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) +#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) +#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) +#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) +#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) +#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) + +#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) +#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) + +#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) +#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) + +#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) + +#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) + +#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) +#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) +#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) + +#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" +#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" +#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" + +static DEFINE_SPINLOCK(hcall_lock); + +static u32 get_longbusy_msecs(int longbusy_rc) +{ + switch (longbusy_rc) { + case H_LONG_BUSY_ORDER_1_MSEC: + return 1; + case H_LONG_BUSY_ORDER_10_MSEC: + return 10; + case H_LONG_BUSY_ORDER_100_MSEC: + return 100; + case H_LONG_BUSY_ORDER_1_SEC: + return 1000; + case H_LONG_BUSY_ORDER_10_SEC: + return 10000; + case H_LONG_BUSY_ORDER_100_SEC: + return 100000; + default: + return 1; + } +} + +static long ehca_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, + opcode, ret, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + else + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); + + return ret; + } + + return H_BUSY; +} + +static long ehca_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) { + ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + } else if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + return ret; + } + + return H_BUSY; +} + +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 *act_nr_of_entries, + u32 *act_pages, + u32 *eq_ist) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + u64 allocate_controls; + + /* resource type */ + allocate_controls = 3ULL; + + /* ISN is associated */ + if (neq_control != 1) + allocate_controls = (1ULL << (63 - 7)) | allocate_controls; + else /* notification event queue */ + allocate_controls = (1ULL << 63) | allocate_controls; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + number_of_entries, /* r6 */ + 0, 0, 0, 0, 0, 0); + eq_handle->handle = outs[0]; + *act_nr_of_entries = (u32)outs[3]; + *act_pages = (u32)outs[4]; + *eq_ist = (u32)outs[5]; + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resource - ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask) +{ + return ehca_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle.handle, /* r4 */ + eq_handle.handle, /* r5 */ + event_mask, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param) +{ + int rc; + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 2, /* r5 */ + param->eq_handle.handle, /* r6 */ + cq->token, /* r7 */ + param->nr_cqe, /* r8 */ + 0, 0, 0, 0); + cq->ipz_cq_handle.handle = outs[0]; + param->act_nr_of_entries = (u32)outs[3]; + param->act_pages = (u32)outs[4]; + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user) +{ + int rc; + u64 ret; + u64 allocate_controls, max_r10_reg, r11, r12; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + allocate_controls = + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) + | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) + | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, + parms->squeue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, + parms->rqueue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_RECV_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_SEND_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, + parms->ud_av_l_key_ctl) + | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); + + max_r10_reg = + EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, + parms->squeue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, + parms->rqueue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, + parms->squeue.max_sge) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, + parms->rqueue.max_sge); + + r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); + + if (parms->ext_type == EQPT_SRQ) + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); + else + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + parms->send_cq_handle.handle, + parms->recv_cq_handle.handle, + parms->eq_handle.handle, + ((u64)parms->token << 32) | parms->pd.value, + max_r10_reg, r11, r12); + + parms->qp_handle.handle = outs[0]; + parms->real_qp_num = (u32)outs[1]; + parms->squeue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); + parms->rqueue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); + parms->squeue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); + parms->rqueue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); + parms->squeue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); + parms->rqueue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block) +{ + u64 ret; + u64 r_cb = virt_to_abs(query_port_response_block); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response block not page aligned"); + return H_PARAMETER; + } + + ret = ehca_plpar_hcall_norets(H_QUERY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0); + + if (ehca_debug_level >= 2) + ehca_dmp(query_port_response_block, 64, "response_block"); + + return ret; +} + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask) +{ + u64 port_attributes = port_cap; + + if (modify_mask & IB_PORT_SHUTDOWN) + port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); + if (modify_mask & IB_PORT_INIT_TYPE) + port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); + if (modify_mask & IB_PORT_RESET_QKEY_CNTR) + port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); + + return ehca_plpar_hcall_norets(H_MODIFY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + port_attributes, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock) +{ + u64 r_cb = virt_to_abs(query_hca_rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response_block=%p not page aligned", + query_hca_rblock); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_QUERY_HCA, + adapter_handle.handle, /* r4 */ + r_cb, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count) +{ + return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, + adapter_handle.handle, /* r4 */ + (u64)queue_type | ((u64)pagesize) << 8, + /* r5 */ + resource_handle, /* r6 */ + logical_address_of_page, /* r7 */ + count, /* r8 */ + 0, 0); +} + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + if (count != 1) { + ehca_gen_err("Ppage counter=%llx", count); + return H_PARAMETER; + } + return hipz_h_register_rpage(adapter_handle, + pagesize, + queue_type, + eq_handle.handle, + logical_address_of_page, count); +} + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, + u32 ist) +{ + u64 ret; + ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, + adapter_handle.handle, /* r4 */ + ist, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret != H_SUCCESS && ret != H_BUSY) + ehca_gen_err("Could not query interrupt state."); + + return ret; +} + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal) +{ + if (count != 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + cq_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa) +{ + if (count > 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe2processed, + void **log_addr_next_rq_wqe2processed, + int dis_and_get_function_code) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + dis_and_get_function_code, /* r5 */ + qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (log_addr_next_sq_wqe2processed) + *log_addr_next_sq_wqe2processed = (void *)outs[0]; + if (log_addr_next_rq_wqe2processed) + *log_addr_next_rq_wqe2processed = (void *)outs[1]; + + return ret; +} + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + update_mask, /* r6 */ + virt_to_abs(mqpcb), /* r7 */ + 0, 0, 0, 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Insufficient resources ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal) +{ + return ehca_plpar_hcall_norets(H_QUERY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + virt_to_abs(qqpcb), /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = hcp_galpas_dtor(&qp->galpas); + if (ret) { + ehca_gen_err("Could not destruct qp->galpas"); + return H_RESOURCE; + } + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + /* function code */ + 1, /* r5 */ + qp->ipz_qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (ret == H_HARDWARE) + ehca_gen_err("HCA not operational. ret=%lli", ret); + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + qp->ipz_qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource still in use. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port) +{ + return ehca_plpar_hcall_norets(H_DEFINE_AQP0, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, 0, 0); + *pma_qp_nr = (u32)outs[0]; + *bma_qp_nr = (u32)outs[1]; + + if (ret == H_ALIAS_EXIST) + ehca_gen_err("AQP1 already exists. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret; + + ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + return ehca_plpar_hcall_norets(H_DETACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); +} + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag) +{ + u64 ret; + + ret = hcp_galpas_dtor(&cq->galpas); + if (ret) { + ehca_gen_err("Could not destruct cp->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + force_flag != 0 ? 1L : 0L, /* r6 */ + 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq) +{ + u64 ret; + + ret = hcp_galpas_dtor(&eq->galpas); + if (ret) { + ehca_gen_err("Could not destruct eq->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + eq->ipz_eq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource in use. ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 5, /* r5 */ + vaddr, /* r6 */ + length, /* r7 */ + (((u64)access_ctrl) << 32ULL), /* r8 */ + pd.value, /* r9 */ + 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret; + + if (unlikely(ehca_debug_level >= 3)) { + if (count > 1) { + u64 *kpage; + int i; + kpage = (u64 *)abs_to_virt(logical_address_of_page); + for (i = 0; i < count; i++) + ehca_gen_dbg("kpage[%d]=%p", + i, (void *)kpage[i]); + } else + ehca_gen_dbg("kpage=%p", + (void *)logical_address_of_page); + } + + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { + ehca_gen_err("logical_address_of_page not on a 4k boundary " + "adapter_handle=%llx mr=%p mr_handle=%llx " + "pagesize=%x queue_type=%x " + "logical_address_of_page=%llx count=%llx", + adapter_handle.handle, mr, + mr->ipz_mr_handle.handle, pagesize, queue_type, + logical_address_of_page, count); + ret = H_PARAMETER; + } else + ret = hipz_h_register_rpage(adapter_handle, pagesize, + queue_type, + mr->ipz_mr_handle.handle, + logical_address_of_page, count); + return ret; +} + +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->len = outs[0]; + outparms->vaddr = outs[1]; + outparms->acl = outs[4] >> 32; + outparms->lkey = (u32)(outs[5] >> 32); + outparms->rkey = (u32)(outs[5] & (0xffffffff)); + + return ret; +} + +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + length, /* r7 */ + /* r8 */ + ((((u64)access_ctrl) << 32ULL) | pd.value), + mr_addr_cb, /* r9 */ + 0, 0, 0); + outparms->vaddr = outs[1]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, + adapter_handle.handle, /* r4 */ + orig_mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + (((u64)access_ctrl) << 32ULL), /* r7 */ + pd.value, /* r8 */ + 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 6, /* r5 */ + pd.value, /* r6 */ + 0, 0, 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MW, outs, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count) +{ + u64 r_cb = virt_to_abs(rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("rblock not page aligned."); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle.handle, + ressource_handle, + r_cb, + 0, 0, 0, 0); +} + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h new file mode 100644 index 00000000..a46e514c --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -0,0 +1,265 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_IF_H__ +#define __HCP_IF_H__ + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "hipz_hw.h" + +/* + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize + * resources, create the empty EQPT (ring). + */ +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist); + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask); +/* + * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize + * resources, create the empty CQPT (ring). + */ +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param); + + +/* + * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, + * initialize resources, create empty QPPTs (2 rings). + */ +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user); + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block); + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask); + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock); + +/* + * hipz_h_register_rpage internal function in hcp_if.h for all + * hcp_H_REGISTER_RPAGE calls. + */ +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count); + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle + hcp_adapter_handle, + u32 ist); + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal); + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa); + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe_tb_processed, + void **log_addr_next_rq_wqe_tb_processed, + int dis_and_get_function_code); +enum hcall_sigt { + HCALL_SIGT_NO_CQE = 0, + HCALL_SIGT_BY_WQE = 1, + HCALL_SIGT_EVERY = 2 +}; + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal); + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal); + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp); + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port); + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr); + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag); + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq); + +/* + * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +/* hipz_h_query_mr queries MR in HW and FW */ +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_free_resource_mr frees MR resources in HW and FW */ +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr); + +/* hipz_h_reregister_pmr reregisters MR in HW and FW */ +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_smr register shared MR in HW and FW */ +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* + * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_query_mw queries MW in HW and FW */ +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_free_resource_mw frees MW resources in HW and FW */ +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw); + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count); +u64 hipz_h_eoi(int irq); + +#endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c new file mode 100644 index 00000000..077376ff --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -0,0 +1,82 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * load store abstraction for ehca register access with tracing + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +u64 hcall_map_page(u64 physaddr) +{ + return (u64)ioremap(physaddr, EHCA_PAGESIZE); +} + +int hcall_unmap_page(u64 mapaddr) +{ + iounmap((volatile void __iomem *) mapaddr); + return 0; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user) +{ + if (!is_user) { + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; + } else + galpas->kernel.fw_handle = 0; + + galpas->user.fw_handle = paddr_user; + + return 0; +} + +int hcp_galpas_dtor(struct h_galpas *galpas) +{ + if (galpas->kernel.fw_handle) { + int ret = hcall_unmap_page(galpas->kernel.fw_handle); + if (ret) + return ret; + } + + galpas->user.fw_handle = galpas->kernel.fw_handle = 0; + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h new file mode 100644 index 00000000..d1b02991 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -0,0 +1,90 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware calls + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Waleri Fomin + * Gerd Bayer + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_PHYP_H__ +#define __HCP_PHYP_H__ + + +/* + * eHCA page (mapped into memory) + * resource to access eHCA register pages in CPU address space +*/ +struct h_galpa { + u64 fw_handle; + /* for pSeries this is a 64bit memory address where + I/O memory is mapped into CPU address space (kv) */ +}; + +/* + * resource to access eHCA address space registers, all types + */ +struct h_galpas { + u32 pid; /*PID of userspace galpa checking */ + struct h_galpa user; /* user space accessible resource, + set to 0 if unused */ + struct h_galpa kernel; /* kernel space accessible resource, + set to 0 if unused */ +}; + +static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) +{ + u64 addr = galpa.fw_handle + offset; + return *(volatile u64 __force *)addr; +} + +static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) +{ + u64 addr = galpa.fw_handle + offset; + *(volatile u64 __force *)addr = value; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user); + +int hcp_galpas_dtor(struct h_galpas *galpas); + +u64 hcall_map_page(u64 physaddr); + +int hcall_unmap_page(u64 mapaddr); + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h new file mode 100644 index 00000000..9dac93d0 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns.h @@ -0,0 +1,68 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_H__ +#define __HIPZ_FNS_H__ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +#include "hipz_fns_core.h" + +#define hipz_galpa_store_eq(gal, offset, value) \ + hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_eq(gal, offset) \ + hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qped(gal, offset, value) \ + hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) + +#define hipz_galpa_load_qped(gal, offset) \ + hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) + +#define hipz_galpa_store_mrmw(gal, offset, value) \ + hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) + +#define hipz_galpa_load_mrmw(gal, offset) \ + hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h new file mode 100644 index 00000000..868735fd --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -0,0 +1,100 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Heiko J Schick + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_CORE_H__ +#define __HIPZ_FNS_CORE_H__ + +#include "hcp_phyp.h" +#include "hipz_hw.h" + +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal, offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) + +static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, + EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); +} + +static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, + EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); +} + +static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) +{ + hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, + EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n0_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, + EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, + value)); + cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); +} + +static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n1_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, + EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); + cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); +} + +#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h new file mode 100644 index 00000000..bf996c7a --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -0,0 +1,414 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * eHCA register definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_HW_H__ +#define __HIPZ_HW_H__ + +#include "ehca_tools.h" + +#define EHCA_MAX_MTU 4 + +/* QP Table Entry Memory Map */ +struct hipz_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; +/* 0x20*/ + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rqa; + u64 qpx_rqc; +/* 0x40*/ + u64 qpx_st; + u64 qpx_pmstate; + u64 qpx_pmfa; + u64 qpx_pkey; +/* 0x60*/ + u64 qpx_pkeya; + u64 qpx_pkeyb; + u64 qpx_pkeyc; + u64 qpx_pkeyd; +/* 0x80*/ + u64 qpx_qkey; + u64 qpx_dqp; + u64 qpx_dlidp; + u64 qpx_portp; +/* 0xa0*/ + u64 qpx_slidp; + u64 qpx_slidpp; + u64 qpx_dlida; + u64 qpx_porta; +/* 0xc0*/ + u64 qpx_slida; + u64 qpx_slidpa; + u64 qpx_slvl; + u64 qpx_ipd; +/* 0xe0*/ + u64 qpx_mtu; + u64 qpx_lato; + u64 qpx_rlimit; + u64 qpx_rnrlimit; +/* 0x100*/ + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_nspsn; +/* 0x120*/ + u64 qpx_nspsnhwm; + u64 reserved1; + u64 qpx_sdsi; + u64 qpx_sdsbc; +/* 0x140*/ + u64 qpx_sqwsize; + u64 qpx_sqwts; + u64 qpx_lsn; + u64 qpx_nssn; +/* 0x160 */ + u64 qpx_mor; + u64 qpx_cor; + u64 qpx_sqsize; + u64 qpx_erc; +/* 0x180*/ + u64 qpx_rnrrc; + u64 qpx_ernrwt; + u64 qpx_rnrresp; + u64 qpx_lmsna; +/* 0x1a0 */ + u64 qpx_sqhpc; + u64 qpx_sqcptp; + u64 qpx_sigt; + u64 qpx_wqecnt; +/* 0x1c0*/ + u64 qpx_rqhp; + u64 qpx_rqptp; + u64 qpx_rqsize; + u64 qpx_nrr; +/* 0x1e0*/ + u64 qpx_rdmac; + u64 qpx_nrpsn; + u64 qpx_lapsn; + u64 qpx_lcr; +/* 0x200*/ + u64 qpx_rwc; + u64 qpx_rwva; + u64 qpx_rdsi; + u64 qpx_rdsbc; +/* 0x220*/ + u64 qpx_rqwsize; + u64 qpx_crmsn; + u64 qpx_rdd; + u64 qpx_larpsn; +/* 0x240*/ + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; +/* 0x260*/ + u64 qpx_aaelog; + u64 qpx_ram; + u64 qpx_rdmaqe0; + u64 qpx_rdmaqe1; +/* 0x280*/ + u64 qpx_rdmaqe2; + u64 qpx_rdmaqe3; + u64 qpx_nrpsnhwm; +/* 0x298*/ + u64 reserved[(0x400 - 0x298) / 8]; +/* 0x400 extended data */ + u64 reserved_ext[(0x500 - 0x400) / 8]; +/* 0x500 */ + u64 reserved2[(0x1000 - 0x500) / 8]; +/* 0x1000 */ +}; + +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) + +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) + +/* MRMWPT Entry Memory Map */ +struct hipz_mrmwmm { + /* 0x00 */ + u64 mrx_hcr; + + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + /* 0x20 */ + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + /* 0x40 */ + u64 reserved4[(0x200 - 0x40) / 8]; + /* 0x200 */ + u64 mrx_ctl[64]; + +}; + +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) + +struct hipz_qpedmm { + /* 0x00 */ + u64 reserved0[(0x400) / 8]; + /* 0x400 */ + u64 qpedx_phh; + u64 qpedx_ppsgp; + /* 0x410 */ + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + /* 0x420 */ + u64 qpedx_ppdgu; + u64 qpedx_aph; + /* 0x430 */ + u64 qpedx_apsgp; + u64 qpedx_apsgu; + /* 0x440 */ + u64 qpedx_apdgp; + u64 qpedx_apdgu; + /* 0x450 */ + u64 qpedx_apav; + u64 qpedx_apsav; + /* 0x460 */ + u64 qpedx_hcr; + u64 reserved1[4]; + /* 0x488 */ + u64 qpedx_rrl0; + /* 0x490 */ + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + /* 0x4a0 */ + u64 reserved2; + u64 qpedx_rrl1; + /* 0x4b0 */ + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + /* 0x4c0 */ + u64 reserved3; + u64 qpedx_rrl2; + /* 0x4d0 */ + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + /* 0x4e0 */ + u64 reserved4; + u64 qpedx_rrl3; + /* 0x4f0 */ + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) + +/* CQ Table Entry Memory Map */ +struct hipz_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; +/* 0x20 */ + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; +/* 0x40 */ + u64 cqx_ep; + u64 cqx_eq; +/* 0x50 */ + u64 reserved1; + u64 cqx_n0; +/* 0x60 */ + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +/* 0x1000 */ +}; + +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) + +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) + +/* EQ Table Entry Memory Map */ +struct hipz_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + + u64 eqx_herr; + u64 eqx_aer; +/* 0x20 */ + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + +/* 0x40 */ + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; +/* 0x60 */ + u64 eqx_it; + +}; + +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) + +/* access control defines for MR/MW */ +#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 +#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 +#define HIPZ_ACCESSCTRL_R_READ 0x00200000 +#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 +#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 + +/* query hca response block */ +struct hipz_query_hca { + u32 cur_reliable_dg; + u32 cur_qp; + u32 cur_cq; + u32 cur_eq; + u32 cur_mr; + u32 cur_mw; + u32 cur_ee_context; + u32 cur_mcast_grp; + u32 cur_qp_attached_mcast_grp; + u32 reserved1; + u32 cur_ipv6_qp; + u32 cur_eth_qp; + u32 cur_hp_mr; + u32 reserved2[3]; + u32 max_rd_domain; + u32 max_qp; + u32 max_cq; + u32 max_eq; + u32 max_mr; + u32 max_hp_mr; + u32 max_mw; + u32 max_mrwpte; + u32 max_special_mrwpte; + u32 max_rd_ee_context; + u32 max_mcast_grp; + u32 max_total_mcast_qp_attach; + u32 max_mcast_qp_attach; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 internal_clock_frequency; + u32 max_pd; + u32 max_ah; + u32 max_cqe; + u32 max_wqes_wq; + u32 max_partitions; + u32 max_rr_ee_context; + u32 max_rr_qp; + u32 max_rr_hca; + u32 max_act_wqs_ee_context; + u32 max_act_wqs_qp; + u32 max_sge; + u32 max_sge_rd; + u32 memory_page_size_supported; + u64 max_mr_size; + u32 local_ca_ack_delay; + u32 num_ports; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 node_guid; + u64 hca_cap_indicators; + u32 data_counter_register_size; + u32 max_shared_rq; + u32 max_isns_eq; + u32 max_neq; +} __attribute__ ((packed)); + +#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) +#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) +#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) +#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) +#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) +#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) +#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) +#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) +#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) +#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) +#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) +#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) +#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) +#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) +#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) +#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) +#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) +#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) + +/* query port response block */ +struct hipz_query_port { + u32 state; + u32 bad_pkey_cntr; + u32 lmc; + u32 lid; + u32 subnet_timeout; + u32 qkey_viol_cntr; + u32 sm_sl; + u32 sm_lid; + u32 capability_mask; + u32 init_type_reply; + u32 pkey_tbl_len; + u32 gid_tbl_len; + u64 gid_prefix; + u32 port_nr; + u16 pkey_entries[16]; + u8 reserved1[32]; + u32 trent_size; + u32 trbuf_size; + u64 max_msg_sz; + u32 max_mtu; + u32 vl_cap; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; + u64 guid_entries[255]; +} __attribute__ ((packed)); + +#endif diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c new file mode 100644 index 00000000..1898d6e7 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -0,0 +1,294 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ipz_pt_fn.h" +#include "ehca_classes.h" + +#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) + +struct kmem_cache *small_qp_cache; + +void *ipz_qpageit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + ret = NULL; + } + if (((u64)ret) % queue->pagesize) { + ehca_gen_err("ERROR!! not at PAGE-Boundary"); + return NULL; + } + return ret; +} + +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) +{ + int i; + for (i = 0; i < queue->queue_length / queue->pagesize; i++) { + u64 page = (u64)virt_to_abs(queue->queue_pages[i]); + if (addr >= page && addr < page + queue->pagesize) { + *q_offset = addr - page + i * queue->pagesize; + return 0; + } + } + return -EINVAL; +} + +#if PAGE_SHIFT < EHCA_PAGESHIFT +#error Kernel pages must be at least as large than eHCA pages (4K) ! +#endif + +/* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ +static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) +{ + int k, f = 0; + u8 *kpage; + + while (f < nr_of_pages) { + kpage = (u8 *)get_zeroed_page(GFP_KERNEL); + if (!kpage) + goto out; + + for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { + queue->queue_pages[f] = (struct ipz_page *)kpage; + kpage += EHCA_PAGESIZE; + f++; + } + } + return 1; + +out: + for (f = 0; f < nr_of_pages && queue->queue_pages[f]; + f += PAGES_PER_KPAGE) + free_page((unsigned long)(queue->queue_pages)[f]); + return 0; +} + +static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page; + unsigned long bit; + + mutex_lock(&pd->lock); + + if (!list_empty(&pd->free[order])) + page = list_entry(pd->free[order].next, + struct ipz_small_queue_page, list); + else { + page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); + if (!page) + goto out; + + page->page = get_zeroed_page(GFP_KERNEL); + if (!page->page) { + kmem_cache_free(small_qp_cache, page); + goto out; + } + + list_add(&page->list, &pd->free[order]); + } + + bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); + __set_bit(bit, page->bitmap); + page->fill++; + + if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) + list_move(&page->list, &pd->full[order]); + + mutex_unlock(&pd->lock); + + queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); + queue->small_page = page; + queue->offset = bit << (order + 9); + return 1; + +out: + ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); + return 0; +} + +static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page = queue->small_page; + unsigned long bit; + int free_page = 0; + + bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) + >> (order + 9); + + mutex_lock(&pd->lock); + + __clear_bit(bit, page->bitmap); + page->fill--; + + if (page->fill == 0) { + list_del(&page->list); + free_page = 1; + } + + if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) + /* the page was full until we freed the chunk */ + list_move_tail(&page->list, &pd->free[order]); + + mutex_unlock(&pd->lock); + + if (free_page) { + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } +} + +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small) +{ + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x " + "is greater than kernel page size", pagesize); + return 0; + } + + /* init queue fields */ + queue->queue_length = nr_of_pages * pagesize; + queue->pagesize = pagesize; + queue->qe_size = qe_size; + queue->act_nr_of_sg = nr_of_sg; + queue->current_q_offset = 0; + queue->toggle_state = 1; + queue->small_page = NULL; + + /* allocate queue page pointers */ + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); + if (!queue->queue_pages) { + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } + } + + /* allocate actual queue pages */ + if (is_small) { + if (!alloc_small_queue_page(queue, pd)) + goto ipz_queue_ctor_exit0; + } else + if (!alloc_queue_pages(queue, nr_of_pages)) + goto ipz_queue_ctor_exit0; + + return 1; + +ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't alloc pages queue=%p " + "nr_of_pages=%x", queue, nr_of_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); + + return 0; +} + +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) +{ + int i, nr_pages; + + if (!queue || !queue->queue_pages) { + ehca_gen_dbg("queue or queue_pages is NULL"); + return 0; + } + + if (queue->small_page) + free_small_queue_page(queue, pd); + else { + nr_pages = queue->queue_length / queue->pagesize; + for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) + free_page((unsigned long)queue->queue_pages[i]); + } + + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); + + return 1; +} + +int ehca_init_small_qp_cache(void) +{ + small_qp_cache = kmem_cache_create("ehca_cache_small_qp", + sizeof(struct ipz_small_queue_page), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!small_qp_cache) + return -ENOMEM; + + return 0; +} + +void ehca_cleanup_small_qp_cache(void) +{ + kmem_cache_destroy(small_qp_cache); +} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h new file mode 100644 index 00000000..a801274e --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -0,0 +1,289 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPZ_PT_FN_H__ +#define __IPZ_PT_FN_H__ + +#define EHCA_PAGESHIFT 12 +#define EHCA_PAGESIZE 4096UL +#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) +#define EHCA_PT_ENTRIES 512UL + +#include "ehca_tools.h" +#include "ehca_qes.h" + +struct ehca_pd; +struct ipz_small_queue_page; + +extern struct kmem_cache *small_qp_cache; + +/* struct generic ehca page */ +struct ipz_page { + u8 entries[EHCA_PAGESIZE]; +}; + +#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) + +struct ipz_small_queue_page { + unsigned long page; + unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; + int fill; + void *mapped_addr; + u32 mmap_count; + struct list_head list; +}; + +/* struct generic queue in linux kernel virtual memory (kv) */ +struct ipz_queue { + u64 current_q_offset; /* current queue entry */ + + struct ipz_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 offset; /* save offset within page for small_qp */ + struct ipz_small_queue_page *small_page; +}; + +/* + * return current Queue Entry for a certain q_offset + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) +{ + struct ipz_page *current_page; + if (q_offset >= queue->queue_length) + return NULL; + current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; +} + +/* + * return current Queue Entry + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_get(struct ipz_queue *queue) +{ + return ipz_qeit_calc(queue, queue->current_q_offset); +} + +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipz_queue, last increment will return 0! and + * NOT wrap + * returns address (kv) of Queue Page + * warning don't use in parallel with ipz_QE_get_inc() + */ +void *ipz_qpageit_get_inc(struct ipz_queue *queue); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +/* + * return a bool indicating whether current Queue Entry is valid + */ +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); +} + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) +{ + return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; +} + +/* + * returns and resets Queue Entry iterator + * returns address (kv) of first Queue Entry + */ +static inline void *ipz_qeit_reset(struct ipz_queue *queue) +{ + queue->current_q_offset = 0; + return ipz_qeit_get(queue); +} + +/* + * return the q_offset corresponding to an absolute address + */ +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); + +/* + * return the next queue offset. don't modify the queue. + */ +static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) +{ + offset += queue->qe_size; + if (offset >= queue->queue_length) offset = 0; + return offset; +} + +/* struct generic page table */ +struct ipz_pt { + u64 entries[EHCA_PT_ENTRIES]; +}; + +/* struct page table for a queue, only to be used in pf */ +struct ipz_qpt { + /* queue page tables (kv), use u64 because we know the element length */ + u64 *qpts; + u32 n_qpts; + u32 n_ptes; /* number of page table entries */ + u64 *current_pte_addr; +}; + +/* + * constructor for a ipz_queue_t, placement new for ipz_queue_t, + * new for all dependent datastructors + * all QP Tables are the same + * flow: + * allocate+pin queue + * see ipz_qpt_ctor() + * returns true if ok, false if out of memory + */ +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small); + +/* + * destructor for a ipz_queue_t + * -# free queue + * see ipz_queue_ctor() + * returns true if ok, false if queue was NULL-ptr of free failed + */ +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); + +/* + * constructor for a ipz_qpt_t, + * placement new for struct ipz_queue, new for all dependent datastructors + * all QP Tables are the same, + * flow: + * -# allocate+pin queue + * -# initialise ptcb + * -# allocate+pin PTs + * -# link PTs to a ring, according to HCA Arch, set bit62 id needed + * -# the ring must have room for exactly nr_of_PTEs + * see ipz_qpt_ctor() + */ +void ipz_qpt_ctor(struct ipz_qpt *qpt, + const u32 nr_of_qes, + const u32 pagesize, + const u32 qe_size, + const u8 lowbyte, const u8 toggle, + u32 * act_nr_of_QEs, u32 * act_nr_of_pages); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + * fix EQ page problems + */ +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); + +/* + * return current Event Queue Entry, increment Queue Entry iterator + * by one step in struct ipz_queue if valid, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_queue_QPageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + ipz_qeit_eq_get_inc(queue); /* this is a good one */ + return ret; +} + +static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + return ret; +} + +/* returns address (GX) of first queue entry */ +static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) +{ + return be64_to_cpu(qpt->qpts[0]); +} + +/* returns address (kv) of first page of queue page table */ +static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) +{ + return qpt->qpts; +} + +#endif /* __IPZ_PT_FN_H__ */ diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig new file mode 100644 index 00000000..1d9bb115 --- /dev/null +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -0,0 +1,11 @@ +config INFINIBAND_IPATH + tristate "QLogic HTX HCA support" + depends on 64BIT && NET && HT_IRQ + ---help--- + This is a driver for the obsolete QLogic Hyper-Transport + IB host channel adapter (model QHT7140), + including InfiniBand verbs support. This driver allows these + devices to be used with both kernel upper level protocols such + as IP-over-InfiniBand as well as with userspace applications + (in conjunction with InfiniBand userspace access). + For QLogic PCIe QLE based cards, use the QIB driver instead. diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile new file mode 100644 index 00000000..4496f282 --- /dev/null +++ b/drivers/infiniband/hw/ipath/Makefile @@ -0,0 +1,37 @@ +ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \ + -DIPATH_KERN_TYPE=0 + +obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o + +ib_ipath-y := \ + ipath_cq.o \ + ipath_diag.o \ + ipath_dma.o \ + ipath_driver.o \ + ipath_eeprom.o \ + ipath_file_ops.o \ + ipath_fs.o \ + ipath_init_chip.o \ + ipath_intr.o \ + ipath_keys.o \ + ipath_mad.o \ + ipath_mmap.o \ + ipath_mr.o \ + ipath_qp.o \ + ipath_rc.o \ + ipath_ruc.o \ + ipath_sdma.o \ + ipath_srq.o \ + ipath_stats.o \ + ipath_sysfs.o \ + ipath_uc.o \ + ipath_ud.o \ + ipath_user_pages.o \ + ipath_user_sdma.o \ + ipath_verbs_mcast.o \ + ipath_verbs.o + +ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o + +ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o +ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h new file mode 100644 index 00000000..28cfe97c --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -0,0 +1,851 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _IPATH_COMMON_H +#define _IPATH_COMMON_H + +/* + * This file contains defines, structures, etc. that are used + * to communicate between kernel and user code. + */ + + +/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */ +#define IPATH_SRC_OUI_1 0x00 +#define IPATH_SRC_OUI_2 0x11 +#define IPATH_SRC_OUI_3 0x75 + +/* version of protocol header (known to chip also). In the long run, + * we should be able to generate and accept a range of version numbers; + * for now we only accept one, and it's compiled in. + */ +#define IPS_PROTO_VERSION 2 + +/* + * These are compile time constants that you may want to enable or disable + * if you are trying to debug problems with code or performance. + * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in + * fastpath code + * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be + * traced in faspath code + * _IPATH_TRACING define as 0 if you want to remove all tracing in a + * compilation unit + * _IPATH_DEBUGGING define as 0 if you want to remove debug prints + */ + +/* + * The value in the BTH QP field that InfiniPath uses to differentiate + * an infinipath protocol IB packet vs standard IB transport + */ +#define IPATH_KD_QP 0x656b79 + +/* + * valid states passed to ipath_set_linkstate() user call + */ +#define IPATH_IB_LINKDOWN 0 +#define IPATH_IB_LINKARM 1 +#define IPATH_IB_LINKACTIVE 2 +#define IPATH_IB_LINKDOWN_ONLY 3 +#define IPATH_IB_LINKDOWN_SLEEP 4 +#define IPATH_IB_LINKDOWN_DISABLE 5 +#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */ +#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ +#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */ +#define IPATH_IB_LINK_HRTBT 9 /* enable heartbeat, normal, non-loopback */ + +/* + * These 3 values (SDR and DDR may be ORed for auto-speed + * negotiation) are used for the 3rd argument to path_f_set_ib_cfg + * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They + * are also the the possible values for ipath_link_speed_enabled and active + * The values were chosen to match values used within the IB spec. + */ +#define IPATH_IB_SDR 1 +#define IPATH_IB_DDR 2 + +/* + * stats maintained by the driver. For now, at least, this is global + * to all minor devices. + */ +struct infinipath_stats { + /* number of interrupts taken */ + __u64 sps_ints; + /* number of interrupts for errors */ + __u64 sps_errints; + /* number of errors from chip (not incl. packet errors or CRC) */ + __u64 sps_errs; + /* number of packet errors from chip other than CRC */ + __u64 sps_pkterrs; + /* number of packets with CRC errors (ICRC and VCRC) */ + __u64 sps_crcerrs; + /* number of hardware errors reported (parity, etc.) */ + __u64 sps_hwerrs; + /* number of times IB link changed state unexpectedly */ + __u64 sps_iblink; + __u64 sps_unused; /* was fastrcvint, no longer implemented */ + /* number of kernel (port0) packets received */ + __u64 sps_port0pkts; + /* number of "ethernet" packets sent by driver */ + __u64 sps_ether_spkts; + /* number of "ethernet" packets received by driver */ + __u64 sps_ether_rpkts; + /* number of SMA packets sent by driver. Obsolete. */ + __u64 sps_sma_spkts; + /* number of SMA packets received by driver. Obsolete. */ + __u64 sps_sma_rpkts; + /* number of times all ports rcvhdrq was full and packet dropped */ + __u64 sps_hdrqfull; + /* number of times all ports egrtid was full and packet dropped */ + __u64 sps_etidfull; + /* + * number of times we tried to send from driver, but no pio buffers + * avail + */ + __u64 sps_nopiobufs; + /* number of ports currently open */ + __u64 sps_ports; + /* list of pkeys (other than default) accepted (0 means not set) */ + __u16 sps_pkeys[4]; + __u16 sps_unused16[4]; /* available; maintaining compatible layout */ + /* number of user ports per chip (not IB ports) */ + __u32 sps_nports; + /* not our interrupt, or already handled */ + __u32 sps_nullintr; + /* max number of packets handled per receive call */ + __u32 sps_maxpkts_call; + /* avg number of packets handled per receive call */ + __u32 sps_avgpkts_call; + /* total number of pages locked */ + __u64 sps_pagelocks; + /* total number of pages unlocked */ + __u64 sps_pageunlocks; + /* + * Number of packets dropped in kernel other than errors (ether + * packets if ipath not configured, etc.) + */ + __u64 sps_krdrops; + __u64 sps_txeparity; /* PIO buffer parity error, recovered */ + /* pad for future growth */ + __u64 __sps_pad[45]; +}; + +/* + * These are the status bits readable (in ascii form, 64bit value) + * from the "status" sysfs file. + */ +#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */ +#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ +/* Device has been disabled via admin request */ +#define IPATH_STATUS_ADMIN_DISABLED 0x4 +/* Chip has been found and initted */ +#define IPATH_STATUS_CHIP_PRESENT 0x20 +/* IB link is at ACTIVE, usable for data traffic */ +#define IPATH_STATUS_IB_READY 0x40 +/* link is configured, LID, MTU, etc. have been set */ +#define IPATH_STATUS_IB_CONF 0x80 +/* no link established, probably no cable */ +#define IPATH_STATUS_IB_NOCABLE 0x100 +/* A Fatal hardware error has occurred. */ +#define IPATH_STATUS_HWERROR 0x200 + +/* + * The list of usermode accessible registers. Also see Reg_* later in file. + */ +typedef enum _ipath_ureg { + /* (RO) DMA RcvHdr to be used next. */ + ur_rcvhdrtail = 0, + /* (RW) RcvHdr entry to be processed next by host. */ + ur_rcvhdrhead = 1, + /* (RO) Index of next Eager index to use. */ + ur_rcvegrindextail = 2, + /* (RW) Eager TID to be processed next */ + ur_rcvegrindexhead = 3, + /* For internal use only; max register number. */ + _IPATH_UregMax +} ipath_ureg; + +/* bit values for spi_runtime_flags */ +#define IPATH_RUNTIME_HT 0x1 +#define IPATH_RUNTIME_PCIE 0x2 +#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 +#define IPATH_RUNTIME_RCVHDR_COPY 0x8 +#define IPATH_RUNTIME_MASTER 0x10 +#define IPATH_RUNTIME_NODMA_RTAIL 0x80 +#define IPATH_RUNTIME_SDMA 0x200 +#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400 +#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800 + +/* + * This structure is returned by ipath_userinit() immediately after + * open to get implementation-specific info, and info specific to this + * instance. + * + * This struct must have explict pad fields where type sizes + * may result in different alignments between 32 and 64 bit + * programs, since the 64 bit * bit kernel requires the user code + * to have matching offsets + */ +struct ipath_base_info { + /* version of hardware, for feature checking. */ + __u32 spi_hw_version; + /* version of software, for feature checking. */ + __u32 spi_sw_version; + /* InfiniPath port assigned, goes into sent packets */ + __u16 spi_port; + __u16 spi_subport; + /* + * IB MTU, packets IB data must be less than this. + * The MTU is in bytes, and will be a multiple of 4 bytes. + */ + __u32 spi_mtu; + /* + * Size of a PIO buffer. Any given packet's total size must be less + * than this (in words). Included is the starting control word, so + * if 513 is returned, then total pkt size is 512 words or less. + */ + __u32 spi_piosize; + /* size of the TID cache in infinipath, in entries */ + __u32 spi_tidcnt; + /* size of the TID Eager list in infinipath, in entries */ + __u32 spi_tidegrcnt; + /* size of a single receive header queue entry in words. */ + __u32 spi_rcvhdrent_size; + /* + * Count of receive header queue entries allocated. + * This may be less than the spu_rcvhdrcnt passed in!. + */ + __u32 spi_rcvhdr_cnt; + + /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */ + __u32 spi_runtime_flags; + + /* address where receive buffer queue is mapped into */ + __u64 spi_rcvhdr_base; + + /* user program. */ + + /* base address of eager TID receive buffers. */ + __u64 spi_rcv_egrbufs; + + /* Allocated by initialization code, not by protocol. */ + + /* + * Size of each TID buffer in host memory, starting at + * spi_rcv_egrbufs. The buffers are virtually contiguous. + */ + __u32 spi_rcv_egrbufsize; + /* + * The special QP (queue pair) value that identifies an infinipath + * protocol packet from standard IB packets. More, probably much + * more, to be added. + */ + __u32 spi_qpair; + + /* + * User register base for init code, not to be used directly by + * protocol or applications. + */ + __u64 __spi_uregbase; + /* + * Maximum buffer size in bytes that can be used in a single TID + * entry (assuming the buffer is aligned to this boundary). This is + * the minimum of what the hardware and software support Guaranteed + * to be a power of 2. + */ + __u32 spi_tid_maxsize; + /* + * alignment of each pio send buffer (byte count + * to add to spi_piobufbase to get to second buffer) + */ + __u32 spi_pioalign; + /* + * The index of the first pio buffer available to this process; + * needed to do lookup in spi_pioavailaddr; not added to + * spi_piobufbase. + */ + __u32 spi_pioindex; + /* number of buffers mapped for this process */ + __u32 spi_piocnt; + + /* + * Base address of writeonly pio buffers for this process. + * Each buffer has spi_piosize words, and is aligned on spi_pioalign + * boundaries. spi_piocnt buffers are mapped from this address + */ + __u64 spi_piobufbase; + + /* + * Base address of readonly memory copy of the pioavail registers. + * There are 2 bits for each buffer. + */ + __u64 spi_pioavailaddr; + + /* + * Address where driver updates a copy of the interface and driver + * status (IPATH_STATUS_*) as a 64 bit value. It's followed by a + * string indicating hardware error, if there was one. + */ + __u64 spi_status; + + /* number of chip ports available to user processes */ + __u32 spi_nports; + /* unit number of chip we are using */ + __u32 spi_unit; + /* num bufs in each contiguous set */ + __u32 spi_rcv_egrperchunk; + /* size in bytes of each contiguous set */ + __u32 spi_rcv_egrchunksize; + /* total size of mmap to cover full rcvegrbuffers */ + __u32 spi_rcv_egrbuftotlen; + __u32 spi_filler_for_align; + /* address of readonly memory copy of the rcvhdrq tail register. */ + __u64 spi_rcvhdr_tailaddr; + + /* shared memory pages for subports if port is shared */ + __u64 spi_subport_uregbase; + __u64 spi_subport_rcvegrbuf; + __u64 spi_subport_rcvhdr_base; + + /* shared memory page for hardware port if it is shared */ + __u64 spi_port_uregbase; + __u64 spi_port_rcvegrbuf; + __u64 spi_port_rcvhdr_base; + __u64 spi_port_rcvhdr_tailaddr; + +} __attribute__ ((aligned(8))); + + +/* + * This version number is given to the driver by the user code during + * initialization in the spu_userversion field of ipath_user_info, so + * the driver can check for compatibility with user code. + * + * The major version changes when data structures + * change in an incompatible way. The driver must be the same or higher + * for initialization to succeed. In some cases, a higher version + * driver will not interoperate with older software, and initialization + * will return an error. + */ +#define IPATH_USER_SWMAJOR 1 + +/* + * Minor version differences are always compatible + * a within a major version, however if user software is larger + * than driver software, some new features and/or structure fields + * may not be implemented; the user code must deal with this if it + * cares, or it must abort after initialization reports the difference. + */ +#define IPATH_USER_SWMINOR 6 + +#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) + +#define IPATH_KERN_TYPE 0 + +/* + * Similarly, this is the kernel version going back to the user. It's + * slightly different, in that we want to tell if the driver was built as + * part of a QLogic release, or from the driver from openfabrics.org, + * kernel.org, or a standard distribution, for support reasons. + * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied. + * + * It's returned by the driver to the user code during initialization in the + * spi_sw_version field of ipath_base_info, so the user code can in turn + * check for compatibility with the kernel. +*/ +#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION) + +/* + * This structure is passed to ipath_userinit() to tell the driver where + * user code buffers are, sizes, etc. The offsets and sizes of the + * fields must remain unchanged, for binary compatibility. It can + * be extended, if userversion is changed so user code can tell, if needed + */ +struct ipath_user_info { + /* + * version of user software, to detect compatibility issues. + * Should be set to IPATH_USER_SWVERSION. + */ + __u32 spu_userversion; + + /* desired number of receive header queue entries */ + __u32 spu_rcvhdrcnt; + + /* size of struct base_info to write to */ + __u32 spu_base_info_size; + + /* + * number of words in KD protocol header + * This tells InfiniPath how many words to copy to rcvhdrq. If 0, + * kernel uses a default. Once set, attempts to set any other value + * are an error (EAGAIN) until driver is reloaded. + */ + __u32 spu_rcvhdrsize; + + /* + * If two or more processes wish to share a port, each process + * must set the spu_subport_cnt and spu_subport_id to the same + * values. The only restriction on the spu_subport_id is that + * it be unique for a given node. + */ + __u16 spu_subport_cnt; + __u16 spu_subport_id; + + __u32 spu_unused; /* kept for compatible layout */ + + /* + * address of struct base_info to write to + */ + __u64 spu_base_info; + +} __attribute__ ((aligned(8))); + +/* User commands. */ + +#define IPATH_CMD_MIN 16 + +#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */ +#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */ +#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */ +#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ +#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ +#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ +#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */ +#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ +#define IPATH_CMD_USER_INIT 24 /* set up userspace */ +#define IPATH_CMD_UNUSED_1 25 +#define IPATH_CMD_UNUSED_2 26 +#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ +#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ +#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ +/* 30 is unused */ +#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ +#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ + +/* + * Poll types + */ +#define IPATH_POLL_TYPE_URGENT 0x01 +#define IPATH_POLL_TYPE_OVERFLOW 0x02 + +struct ipath_port_info { + __u32 num_active; /* number of active units */ + __u32 unit; /* unit (chip) assigned to caller */ + __u16 port; /* port on unit assigned to caller */ + __u16 subport; /* subport on unit assigned to caller */ + __u16 num_ports; /* number of ports available on unit */ + __u16 num_subports; /* number of subports opened on port */ +}; + +struct ipath_tid_info { + __u32 tidcnt; + /* make structure same size in 32 and 64 bit */ + __u32 tid__unused; + /* virtual address of first page in transfer */ + __u64 tidvaddr; + /* pointer (same size 32/64 bit) to __u16 tid array */ + __u64 tidlist; + + /* + * pointer (same size 32/64 bit) to bitmap of TIDs used + * for this call; checked for being large enough at open + */ + __u64 tidmap; +}; + +struct ipath_cmd { + __u32 type; /* command type */ + union { + struct ipath_tid_info tid_info; + struct ipath_user_info user_info; + + /* + * address in userspace where we should put the sdma + * inflight counter + */ + __u64 sdma_inflight; + /* + * address in userspace where we should put the sdma + * completion counter + */ + __u64 sdma_complete; + /* address in userspace of struct ipath_port_info to + write result to */ + __u64 port_info; + /* enable/disable receipt of packets */ + __u32 recv_ctrl; + /* enable/disable armlaunch errors (non-zero to enable) */ + __u32 armlaunch_ctrl; + /* partition key to set */ + __u16 part_key; + /* user address of __u32 bitmask of active slaves */ + __u64 slave_mask_addr; + /* type of polling we want */ + __u16 poll_type; + } cmd; +}; + +struct ipath_iovec { + /* Pointer to data, but same size 32 and 64 bit */ + __u64 iov_base; + + /* + * Length of data; don't need 64 bits, but want + * ipath_sendpkt to remain same size as before 32 bit changes, so... + */ + __u64 iov_len; +}; + +/* + * Describes a single packet for send. Each packet can have one or more + * buffers, but the total length (exclusive of IB headers) must be less + * than the MTU, and if using the PIO method, entire packet length, + * including IB headers, must be less than the ipath_piosize value (words). + * Use of this necessitates including sys/uio.h + */ +struct __ipath_sendpkt { + __u32 sps_flags; /* flags for packet (TBD) */ + __u32 sps_cnt; /* number of entries to use in sps_iov */ + /* array of iov's describing packet. TEMPORARY */ + struct ipath_iovec sps_iov[4]; +}; + +/* + * diagnostics can send a packet by "writing" one of the following + * two structs to diag data special file + * The first is the legacy version for backward compatibility + */ +struct ipath_diag_pkt { + __u32 unit; + __u64 data; + __u32 len; +}; + +/* The second diag_pkt struct is the expanded version that allows + * more control over the packet, specifically, by allowing a custom + * pbc (+ static rate) qword, so that special modes and deliberate + * changes to CRCs can be used. The elements were also re-ordered + * for better alignment and to avoid padding issues. + */ +struct ipath_diag_xpkt { + __u64 data; + __u64 pbc_wd; + __u32 unit; + __u32 len; +}; + +/* + * Data layout in I2C flash (for GUID, etc.) + * All fields are little-endian binary unless otherwise stated + */ +#define IPATH_FLASH_VERSION 2 +struct ipath_flash { + /* flash layout version (IPATH_FLASH_VERSION) */ + __u8 if_fversion; + /* checksum protecting if_length bytes */ + __u8 if_csum; + /* + * valid length (in use, protected by if_csum), including + * if_fversion and if_csum themselves) + */ + __u8 if_length; + /* the GUID, in network order */ + __u8 if_guid[8]; + /* number of GUIDs to use, starting from if_guid */ + __u8 if_numguid; + /* the (last 10 characters of) board serial number, in ASCII */ + char if_serial[12]; + /* board mfg date (YYYYMMDD ASCII) */ + char if_mfgdate[8]; + /* last board rework/test date (YYYYMMDD ASCII) */ + char if_testdate[8]; + /* logging of error counts, TBD */ + __u8 if_errcntp[4]; + /* powered on hours, updated at driver unload */ + __u8 if_powerhour[2]; + /* ASCII free-form comment field */ + char if_comment[32]; + /* Backwards compatible prefix for longer QLogic Serial Numbers */ + char if_sprefix[4]; + /* 82 bytes used, min flash size is 128 bytes */ + __u8 if_future[46]; +}; + +/* + * These are the counters implemented in the chip, and are listed in order. + * The InterCaps naming is taken straight from the chip spec. + */ +struct infinipath_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 TxSDmaDescCnt; /* was Reserved1 */ + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */ + __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */ + __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; + /* The following are new for IBA7220 */ + __u64 RxVL15DroppedPktCnt; + __u64 RxOtherLocalPhyErrCnt; + __u64 PcieRetryBufDiagQwordCnt; + __u64 ExcessBufferOvflCnt; + __u64 LocalLinkIntegrityErrCnt; + __u64 RxVlErrCnt; + __u64 RxDlidFltrCnt; +}; + +/* + * The next set of defines are for packet headers, and chip register + * and memory bits that are visible to and/or used by user-mode software + * The other bits that are used only by the driver or diags are in + * ipath_registers.h + */ + +/* RcvHdrFlags bits */ +#define INFINIPATH_RHF_LENGTH_MASK 0x7FF +#define INFINIPATH_RHF_LENGTH_SHIFT 0 +#define INFINIPATH_RHF_RCVTYPE_MASK 0x7 +#define INFINIPATH_RHF_RCVTYPE_SHIFT 11 +#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF +#define INFINIPATH_RHF_EGRINDEX_SHIFT 16 +#define INFINIPATH_RHF_SEQ_MASK 0xF +#define INFINIPATH_RHF_SEQ_SHIFT 0 +#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF +#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4 +#define INFINIPATH_RHF_H_ICRCERR 0x80000000 +#define INFINIPATH_RHF_H_VCRCERR 0x40000000 +#define INFINIPATH_RHF_H_PARITYERR 0x20000000 +#define INFINIPATH_RHF_H_LENERR 0x10000000 +#define INFINIPATH_RHF_H_MTUERR 0x08000000 +#define INFINIPATH_RHF_H_IHDRERR 0x04000000 +#define INFINIPATH_RHF_H_TIDERR 0x02000000 +#define INFINIPATH_RHF_H_MKERR 0x01000000 +#define INFINIPATH_RHF_H_IBERR 0x00800000 +#define INFINIPATH_RHF_H_ERR_MASK 0xFF800000 +#define INFINIPATH_RHF_L_USE_EGR 0x80000000 +#define INFINIPATH_RHF_L_SWA 0x00008000 +#define INFINIPATH_RHF_L_SWB 0x00004000 + +/* infinipath header fields */ +#define INFINIPATH_I_VERS_MASK 0xF +#define INFINIPATH_I_VERS_SHIFT 28 +#define INFINIPATH_I_PORT_MASK 0xF +#define INFINIPATH_I_PORT_SHIFT 24 +#define INFINIPATH_I_TID_MASK 0x7FF +#define INFINIPATH_I_TID_SHIFT 13 +#define INFINIPATH_I_OFFSET_MASK 0x1FFF +#define INFINIPATH_I_OFFSET_SHIFT 0 + +/* K_PktFlags bits */ +#define INFINIPATH_KPF_INTR 0x1 +#define INFINIPATH_KPF_SUBPORT_MASK 0x3 +#define INFINIPATH_KPF_SUBPORT_SHIFT 1 + +#define INFINIPATH_MAX_SUBPORT 4 + +/* SendPIO per-buffer control */ +#define INFINIPATH_SP_TEST 0x40 +#define INFINIPATH_SP_TESTEBP 0x20 +#define INFINIPATH_SP_TRIGGER_SHIFT 15 + +/* SendPIOAvail bits */ +#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1 +#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0 + +/* infinipath header format */ +struct ipath_header { + /* + * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - + * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, + * Port 4, TID 11, offset 13. + */ + __le32 ver_port_tid_offset; + __le16 chksum; + __le16 pkt_flags; +}; + +/* infinipath user message header format. + * This structure contains the first 4 fields common to all protocols + * that employ infinipath. + */ +struct ipath_message_header { + __be16 lrh[4]; + __be32 bth[3]; + /* fields below this point are in host byte order */ + struct ipath_header iph; + __u8 sub_opcode; +}; + +/* infinipath ethernet header format */ +struct ether_header { + __be16 lrh[4]; + __be32 bth[3]; + struct ipath_header iph; + __u8 sub_opcode; + __u8 cmd; + __be16 lid; + __u16 mac[3]; + __u8 frag_num; + __u8 seq_num; + __le32 len; + /* MUST be of word size due to PIO write requirements */ + __le32 csum; + __le16 csum_offset; + __le16 flags; + __u16 first_2_bytes; + __u8 unused[2]; /* currently unused */ +}; + + +/* IB - LRH header consts */ +#define IPATH_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ +#define IPATH_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ + +/* misc. */ +#define SIZE_OF_CRC 1 + +#define IPATH_DEFAULT_P_KEY 0xFFFF +#define IPATH_PERMISSIVE_LID 0xFFFF +#define IPATH_AETH_CREDIT_SHIFT 24 +#define IPATH_AETH_CREDIT_MASK 0x1F +#define IPATH_AETH_CREDIT_INVAL 0x1F +#define IPATH_PSN_MASK 0xFFFFFF +#define IPATH_MSN_MASK 0xFFFFFF +#define IPATH_QPN_MASK 0xFFFFFF +#define IPATH_MULTICAST_LID_BASE 0xC000 +#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK +#define IPATH_MULTICAST_QPN 0xFFFFFF + +/* Receive Header Queue: receive type (from infinipath) */ +#define RCVHQ_RCV_TYPE_EXPECTED 0 +#define RCVHQ_RCV_TYPE_EAGER 1 +#define RCVHQ_RCV_TYPE_NON_KD 2 +#define RCVHQ_RCV_TYPE_ERROR 3 + + +/* sub OpCodes - ith4x */ +#define IPATH_ITH4X_OPCODE_ENCAP 0x81 +#define IPATH_ITH4X_OPCODE_LID_ARP 0x82 + +#define IPATH_HEADER_QUEUE_WORDS 9 + +/* functions for extracting fields from rcvhdrq entries for the driver. + */ +static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf) +{ + return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK; +} + +static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf) +{ + return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT) + & INFINIPATH_RHF_RCVTYPE_MASK; +} + +static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf) +{ + return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT) + & INFINIPATH_RHF_LENGTH_MASK) << 2; +} + +static inline __u32 ipath_hdrget_index(const __le32 * rbuf) +{ + return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT) + & INFINIPATH_RHF_EGRINDEX_MASK; +} + +static inline __u32 ipath_hdrget_seq(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT) + & INFINIPATH_RHF_SEQ_MASK; +} + +static inline __u32 ipath_hdrget_offset(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT) + & INFINIPATH_RHF_HDRQ_OFFSET_MASK; +} + +static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf) +{ + return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR; +} + +static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword) +{ + return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT) + & INFINIPATH_I_VERS_MASK; +} + +#endif /* _IPATH_COMMON_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c new file mode 100644 index 00000000..0416c6c0 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "ipath_verbs.h" + +/** + * ipath_cq_enter - add a new entry to the completion queue + * @cq: completion queue + * @entry: work completion entry to add + * @sig: true if @entry is a solicitated entry + * + * This may be called with qp->s_lock held. + */ +void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) +{ + struct ipath_cq_wc *wc; + unsigned long flags; + u32 head; + u32 next; + + spin_lock_irqsave(&cq->lock, flags); + + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + wc = cq->queue; + head = wc->head; + if (head >= (unsigned) cq->ibcq.cqe) { + head = cq->ibcq.cqe; + next = 0; + } else + next = head + 1; + if (unlikely(next == wc->tail)) { + spin_unlock_irqrestore(&cq->lock, flags); + if (cq->ibcq.event_handler) { + struct ib_event ev; + + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + return; + } + if (cq->ip) { + wc->uqueue[head].wr_id = entry->wr_id; + wc->uqueue[head].status = entry->status; + wc->uqueue[head].opcode = entry->opcode; + wc->uqueue[head].vendor_err = entry->vendor_err; + wc->uqueue[head].byte_len = entry->byte_len; + wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; + wc->uqueue[head].qp_num = entry->qp->qp_num; + wc->uqueue[head].src_qp = entry->src_qp; + wc->uqueue[head].wc_flags = entry->wc_flags; + wc->uqueue[head].pkey_index = entry->pkey_index; + wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].sl = entry->sl; + wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; + wc->uqueue[head].port_num = entry->port_num; + /* Make sure entry is written before the head index. */ + smp_wmb(); + } else + wc->kqueue[head] = *entry; + wc->head = next; + + if (cq->notify == IB_CQ_NEXT_COMP || + (cq->notify == IB_CQ_SOLICITED && solicited)) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + /* + * This will cause send_complete() to be called in + * another thread. + */ + tasklet_hi_schedule(&cq->comptask); + } + + spin_unlock_irqrestore(&cq->lock, flags); + + if (entry->status != IB_WC_SUCCESS) + to_idev(cq->ibcq.device)->n_wqe_errs++; +} + +/** + * ipath_poll_cq - poll for work completion entries + * @ibcq: the completion queue to poll + * @num_entries: the maximum number of entries to return + * @entry: pointer to array where work completions are placed + * + * Returns the number of completion entries polled. + * + * This may be called from interrupt context. Also called by ib_poll_cq() + * in the generic verbs code. + */ +int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + struct ipath_cq *cq = to_icq(ibcq); + struct ipath_cq_wc *wc; + unsigned long flags; + int npolled; + u32 tail; + + /* The kernel can only poll a kernel completion queue */ + if (cq->ip) { + npolled = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&cq->lock, flags); + + wc = cq->queue; + tail = wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { + if (tail == wc->head) + break; + /* The kernel doesn't need a RMB since it has the lock. */ + *entry = wc->kqueue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; + else + tail++; + } + wc->tail = tail; + + spin_unlock_irqrestore(&cq->lock, flags); + +bail: + return npolled; +} + +static void send_complete(unsigned long data) +{ + struct ipath_cq *cq = (struct ipath_cq *)data; + + /* + * The completion handler will most likely rearm the notification + * and poll for all pending entries. If a new completion entry + * is added while we are in this routine, tasklet_hi_schedule() + * won't call us again until we return so we check triggered to + * see if we need to call the handler again. + */ + for (;;) { + u8 triggered = cq->triggered; + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + + if (cq->triggered == triggered) + return; + } +} + +/** + * ipath_create_cq - create a completion queue + * @ibdev: the device this completion queue is attached to + * @entries: the minimum size of the completion queue + * @context: unused by the InfiniPath driver + * @udata: unused by the InfiniPath driver + * + * Returns a pointer to the completion queue or negative errno values + * for failure. + * + * Called by ib_create_cq() in the generic verbs code. + */ +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cq *cq; + struct ipath_cq_wc *wc; + struct ib_cq *ret; + u32 sz; + + if (entries < 1 || entries > ib_ipath_max_cqes) { + ret = ERR_PTR(-EINVAL); + goto done; + } + + /* Allocate the completion queue structure. */ + cq = kmalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + ret = ERR_PTR(-ENOMEM); + goto done; + } + + /* + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (entries + 1); + else + sz += sizeof(struct ib_wc) * (entries + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = ERR_PTR(-ENOMEM); + goto bail_cq; + } + + /* + * Return the address of the WC as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + + cq->ip = ipath_create_mmap_info(dev, sz, context, wc); + if (!cq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + + err = ib_copy_to_udata(udata, &cq->ip->offset, + sizeof(cq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else + cq->ip = NULL; + + spin_lock(&dev->n_cqs_lock); + if (dev->n_cqs_allocated == ib_ipath_max_cqs) { + spin_unlock(&dev->n_cqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_cqs_allocated++; + spin_unlock(&dev->n_cqs_lock); + + if (cq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + /* + * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. + * The number of entries should be >= the number requested or return + * an error. + */ + cq->ibcq.cqe = entries; + cq->notify = IB_CQ_NONE; + cq->triggered = 0; + spin_lock_init(&cq->lock); + tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); + wc->head = 0; + wc->tail = 0; + cq->queue = wc; + + ret = &cq->ibcq; + + goto done; + +bail_ip: + kfree(cq->ip); +bail_wc: + vfree(wc); +bail_cq: + kfree(cq); +done: + return ret; +} + +/** + * ipath_destroy_cq - destroy a completion queue + * @ibcq: the completion queue to destroy. + * + * Returns 0 for success. + * + * Called by ib_destroy_cq() in the generic verbs code. + */ +int ipath_destroy_cq(struct ib_cq *ibcq) +{ + struct ipath_ibdev *dev = to_idev(ibcq->device); + struct ipath_cq *cq = to_icq(ibcq); + + tasklet_kill(&cq->comptask); + spin_lock(&dev->n_cqs_lock); + dev->n_cqs_allocated--; + spin_unlock(&dev->n_cqs_lock); + if (cq->ip) + kref_put(&cq->ip->ref, ipath_release_mmap_info); + else + vfree(cq->queue); + kfree(cq); + + return 0; +} + +/** + * ipath_req_notify_cq - change the notification type for a completion queue + * @ibcq: the completion queue + * @notify_flags: the type of notification to request + * + * Returns 0 for success. + * + * This may be called from interrupt context. Also called by + * ib_req_notify_cq() in the generic verbs code. + */ +int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) +{ + struct ipath_cq *cq = to_icq(ibcq); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, flags); + /* + * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). + */ + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->queue->head != cq->queue->tail) + ret = 1; + + spin_unlock_irqrestore(&cq->lock, flags); + + return ret; +} + +/** + * ipath_resize_cq - change the size of the CQ + * @ibcq: the completion queue + * + * Returns 0 for success. + */ +int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) +{ + struct ipath_cq *cq = to_icq(ibcq); + struct ipath_cq_wc *old_wc; + struct ipath_cq_wc *wc; + u32 head, tail, n; + int ret; + u32 sz; + + if (cqe < 1 || cqe > ib_ipath_max_cqes) { + ret = -EINVAL; + goto bail; + } + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); + else + sz += sizeof(struct ib_wc) * (cqe + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = -ENOMEM; + goto bail; + } + + /* Check that we can write the offset to mmap. */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = 0; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&cq->lock); + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + old_wc = cq->queue; + head = old_wc->head; + if (head > (u32) cq->ibcq.cqe) + head = (u32) cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; + else + n = head - tail; + if (unlikely((u32)cqe < n)) { + ret = -EINVAL; + goto bail_unlock; + } + for (n = 0; tail != head; n++) { + if (cq->ip) + wc->uqueue[n] = old_wc->uqueue[tail]; + else + wc->kqueue[n] = old_wc->kqueue[tail]; + if (tail == (u32) cq->ibcq.cqe) + tail = 0; + else + tail++; + } + cq->ibcq.cqe = cqe; + wc->head = n; + wc->tail = 0; + cq->queue = wc; + spin_unlock_irq(&cq->lock); + + vfree(old_wc); + + if (cq->ip) { + struct ipath_ibdev *dev = to_idev(ibcq->device); + struct ipath_mmap_info *ip = cq->ip; + + ipath_update_mmap_info(dev, ip, sz, wc); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = 0; + goto bail; + +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); +bail: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h new file mode 100644 index 00000000..65926cd3 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _IPATH_DEBUG_H +#define _IPATH_DEBUG_H + +#ifndef _IPATH_DEBUGGING /* debugging enabled or not */ +#define _IPATH_DEBUGGING 1 +#endif + +#if _IPATH_DEBUGGING + +/* + * Mask values for debugging. The scheme allows us to compile out any + * of the debug tracing stuff, and if compiled in, to enable or disable + * dynamically. This can be set at modprobe time also: + * modprobe infinipath.ko infinipath_debug=7 + */ + +#define __IPATH_INFO 0x1 /* generic low verbosity stuff */ +#define __IPATH_DBG 0x2 /* generic debug */ +#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */ +/* leave some low verbosity spots open */ +#define __IPATH_VERBDBG 0x40 /* very verbose debug */ +#define __IPATH_PKTDBG 0x80 /* print packet data */ +/* print process startup (init)/exit messages */ +#define __IPATH_PROCDBG 0x100 +/* print mmap/fault stuff, not using VDBG any more */ +#define __IPATH_MMDBG 0x200 +#define __IPATH_ERRPKTDBG 0x400 +#define __IPATH_USER_SEND 0x1000 /* use user mode send */ +#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ +#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ +#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */ +#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */ +#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ +#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */ +#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */ +#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */ + +#else /* _IPATH_DEBUGGING */ + +/* + * define all of these even with debugging off, for the few places that do + * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the + * compiler eliminate the code + */ + +#define __IPATH_INFO 0x0 /* generic low verbosity stuff */ +#define __IPATH_DBG 0x0 /* generic debug */ +#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */ +#define __IPATH_VERBDBG 0x0 /* very verbose debug */ +#define __IPATH_PKTDBG 0x0 /* print packet data */ +#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */ +/* print mmap/fault stuff, not using VDBG any more */ +#define __IPATH_MMDBG 0x0 +#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ +#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ +#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ +#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ +#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */ +#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */ +#define __IPATH_LINKVERBDBG 0x0 /* very verbose linkchange debug */ + +#endif /* _IPATH_DEBUGGING */ + +#define __IPATH_VERBOSEDBG __IPATH_VERBDBG + +#endif /* _IPATH_DEBUG_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c new file mode 100644 index 00000000..714293b7 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -0,0 +1,563 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file contains support for diagnostic functions. It is accessed by + * opening the ipath_diag device, normally minor number 129. Diagnostic use + * of the InfiniPath chip may render the chip or board unusable until the + * driver is unloaded, or in some cases, until the system is rebooted. + * + * Accesses to the chip through this interface are not similar to going + * through the /sys/bus/pci resource mmap interface. + */ + +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_common.h" + +int ipath_diag_inuse; +static int diag_set_link; + +static int ipath_diag_open(struct inode *in, struct file *fp); +static int ipath_diag_release(struct inode *in, struct file *fp); +static ssize_t ipath_diag_read(struct file *fp, char __user *data, + size_t count, loff_t *off); +static ssize_t ipath_diag_write(struct file *fp, const char __user *data, + size_t count, loff_t *off); + +static const struct file_operations diag_file_ops = { + .owner = THIS_MODULE, + .write = ipath_diag_write, + .read = ipath_diag_read, + .open = ipath_diag_open, + .release = ipath_diag_release, + .llseek = default_llseek, +}; + +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off); + +static const struct file_operations diagpkt_file_ops = { + .owner = THIS_MODULE, + .write = ipath_diagpkt_write, + .llseek = noop_llseek, +}; + +static atomic_t diagpkt_count = ATOMIC_INIT(0); +static struct cdev *diagpkt_cdev; +static struct device *diagpkt_dev; + +int ipath_diag_add(struct ipath_devdata *dd) +{ + char name[16]; + int ret = 0; + + if (atomic_inc_return(&diagpkt_count) == 1) { + ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR, + "ipath_diagpkt", &diagpkt_file_ops, + &diagpkt_cdev, &diagpkt_dev); + + if (ret) { + ipath_dev_err(dd, "Couldn't create ipath_diagpkt " + "device: %d", ret); + goto done; + } + } + + snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit); + + ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name, + &diag_file_ops, &dd->diag_cdev, + &dd->diag_dev); + if (ret) + ipath_dev_err(dd, "Couldn't create %s device: %d", + name, ret); + +done: + return ret; +} + +void ipath_diag_remove(struct ipath_devdata *dd) +{ + if (atomic_dec_and_test(&diagpkt_count)) + ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev); + + ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev); +} + +/** + * ipath_read_umem64 - read a 64-bit quantity from the chip into user space + * @dd: the infinipath device + * @uaddr: the location to store the data in user memory + * @caddr: the source chip address (full pointer, not offset) + * @count: number of bytes to copy (multiple of 32 bits) + * + * This function also localizes all chip memory accesses. + * The copy should be written such that we read full cacheline packets + * from the chip. This is usually used for a single qword + * + * NOTE: This assumes the chip address is 64-bit aligned. + */ +static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr, + const void __iomem *caddr, size_t count) +{ + const u64 __iomem *reg_addr = caddr; + const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); + int ret; + + /* not very efficient, but it works for now */ + if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) { + ret = -EINVAL; + goto bail; + } + while (reg_addr < reg_end) { + u64 data = readq(reg_addr); + if (copy_to_user(uaddr, &data, sizeof(u64))) { + ret = -EFAULT; + goto bail; + } + reg_addr++; + uaddr += sizeof(u64); + } + ret = 0; +bail: + return ret; +} + +/** + * ipath_write_umem64 - write a 64-bit quantity to the chip from user space + * @dd: the infinipath device + * @caddr: the destination chip address (full pointer, not offset) + * @uaddr: the source of the data in user memory + * @count: the number of bytes to copy (multiple of 32 bits) + * + * This is usually used for a single qword + * NOTE: This assumes the chip address is 64-bit aligned. + */ + +static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr, + const void __user *uaddr, size_t count) +{ + u64 __iomem *reg_addr = caddr; + const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); + int ret; + + /* not very efficient, but it works for now */ + if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) { + ret = -EINVAL; + goto bail; + } + while (reg_addr < reg_end) { + u64 data; + if (copy_from_user(&data, uaddr, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + writeq(data, reg_addr); + + reg_addr++; + uaddr += sizeof(u64); + } + ret = 0; +bail: + return ret; +} + +/** + * ipath_read_umem32 - read a 32-bit quantity from the chip into user space + * @dd: the infinipath device + * @uaddr: the location to store the data in user memory + * @caddr: the source chip address (full pointer, not offset) + * @count: number of bytes to copy + * + * read 32 bit values, not 64 bit; for memories that only + * support 32 bit reads; usually a single dword. + */ +static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr, + const void __iomem *caddr, size_t count) +{ + const u32 __iomem *reg_addr = caddr; + const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); + int ret; + + if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || + reg_end > (u32 __iomem *) dd->ipath_kregend) { + ret = -EINVAL; + goto bail; + } + /* not very efficient, but it works for now */ + while (reg_addr < reg_end) { + u32 data = readl(reg_addr); + if (copy_to_user(uaddr, &data, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + + reg_addr++; + uaddr += sizeof(u32); + + } + ret = 0; +bail: + return ret; +} + +/** + * ipath_write_umem32 - write a 32-bit quantity to the chip from user space + * @dd: the infinipath device + * @caddr: the destination chip address (full pointer, not offset) + * @uaddr: the source of the data in user memory + * @count: number of bytes to copy + * + * write 32 bit values, not 64 bit; for memories that only + * support 32 bit write; usually a single dword. + */ + +static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr, + const void __user *uaddr, size_t count) +{ + u32 __iomem *reg_addr = caddr; + const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); + int ret; + + if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || + reg_end > (u32 __iomem *) dd->ipath_kregend) { + ret = -EINVAL; + goto bail; + } + while (reg_addr < reg_end) { + u32 data; + if (copy_from_user(&data, uaddr, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + writel(data, reg_addr); + + reg_addr++; + uaddr += sizeof(u32); + } + ret = 0; +bail: + return ret; +} + +static int ipath_diag_open(struct inode *in, struct file *fp) +{ + int unit = iminor(in) - IPATH_DIAG_MINOR_BASE; + struct ipath_devdata *dd; + int ret; + + mutex_lock(&ipath_mutex); + + if (ipath_diag_inuse) { + ret = -EBUSY; + goto bail; + } + + dd = ipath_lookup(unit); + + if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) || + !dd->ipath_kregbase) { + ret = -ENODEV; + goto bail; + } + + fp->private_data = dd; + ipath_diag_inuse = -2; + diag_set_link = 0; + ret = 0; + + /* Only expose a way to reset the device if we + make it into diag mode. */ + ipath_expose_reset(&dd->pcidev->dev); + +bail: + mutex_unlock(&ipath_mutex); + + return ret; +} + +/** + * ipath_diagpkt_write - write an IB packet + * @fp: the diag data device file pointer + * @data: ipath_diag_pkt structure saying where to get the packet + * @count: size of data to write + * @off: unused by this code + */ +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off) +{ + u32 __iomem *piobuf; + u32 plen, clen, pbufn; + struct ipath_diag_pkt odp; + struct ipath_diag_xpkt dp; + u32 *tmpbuf = NULL; + struct ipath_devdata *dd; + ssize_t ret = 0; + u64 val; + u32 l_state, lt_state; /* LinkState, LinkTrainingState */ + + if (count < sizeof(odp)) { + ret = -EINVAL; + goto bail; + } + + if (count == sizeof(dp)) { + if (copy_from_user(&dp, data, sizeof(dp))) { + ret = -EFAULT; + goto bail; + } + } else if (copy_from_user(&odp, data, sizeof(odp))) { + ret = -EFAULT; + goto bail; + } + + /* + * Due to padding/alignment issues (lessened with new struct) + * the old and new structs are the same length. We need to + * disambiguate them, which we can do because odp.len has never + * been less than the total of LRH+BTH+DETH so far, while + * dp.unit (same offset) unit is unlikely to get that high. + * Similarly, dp.data, the pointer to user at the same offset + * as odp.unit, is almost certainly at least one (512byte)page + * "above" NULL. The if-block below can be omitted if compatibility + * between a new driver and older diagnostic code is unimportant. + * compatibility the other direction (new diags, old driver) is + * handled in the diagnostic code, with a warning. + */ + if (dp.unit >= 20 && dp.data < 512) { + /* very probable version mismatch. Fix it up */ + memcpy(&odp, &dp, sizeof(odp)); + /* We got a legacy dp, copy elements to dp */ + dp.unit = odp.unit; + dp.data = odp.data; + dp.len = odp.len; + dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ + } + + /* send count must be an exact number of dwords */ + if (dp.len & 3) { + ret = -EINVAL; + goto bail; + } + + clen = dp.len >> 2; + + dd = ipath_lookup(dp.unit); + if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || + !dd->ipath_kregbase) { + ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n", + dp.unit); + ret = -ENODEV; + goto bail; + } + + if (ipath_diag_inuse && !diag_set_link && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + diag_set_link = 1; + ipath_cdbg(VERBOSE, "Trying to set to set link active for " + "diag pkt\n"); + ipath_set_linkstate(dd, IPATH_IB_LINKARM); + ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); + } + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit); + ret = -ENODEV; + goto bail; + } + /* + * Want to skip check for l_state if using custom PBC, + * because we might be trying to force an SM packet out. + * first-cut, skip _all_ state checking in that case. + */ + val = ipath_ib_state(dd, dd->ipath_lastibcstat); + lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); + l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); + if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP || + (val != dd->ib_init && val != dd->ib_arm && + val != dd->ib_active))) { + ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", + dd->ipath_unit, (unsigned long long) val); + ret = -EINVAL; + goto bail; + } + + /* need total length before first word written */ + /* +1 word is for the qword padding */ + plen = sizeof(u32) + dp.len; + + if ((plen + 4) > dd->ipath_ibmaxlen) { + ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", + plen - 4, dd->ipath_ibmaxlen); + ret = -EINVAL; + goto bail; /* before writing pbc */ + } + tmpbuf = vmalloc(plen); + if (!tmpbuf) { + dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " + "failing\n"); + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmpbuf, + (const void __user *) (unsigned long) dp.data, + dp.len)) { + ret = -EFAULT; + goto bail; + } + + plen >>= 2; /* in dwords */ + + piobuf = ipath_getpiobuf(dd, plen, &pbufn); + if (!piobuf) { + ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n", + dd->ipath_unit); + ret = -EBUSY; + goto bail; + } + /* disarm it just to be extra sure */ + ipath_disarm_piobufs(dd, pbufn, 1); + + if (ipath_debug & __IPATH_PKTDBG) + ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", + dd->ipath_unit, plen - 1, pbufn); + + if (dp.pbc_wd == 0) + dp.pbc_wd = plen; + writeq(dp.pbc_wd, piobuf); + /* + * Copy all by the trigger word, then flush, so it's written + * to chip before trigger word, then write trigger word, then + * flush again, so packet is sent. + */ + if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { + ipath_flush_wc(); + __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + ipath_flush_wc(); + __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + } else + __iowrite32_copy(piobuf + 2, tmpbuf, clen); + + ipath_flush_wc(); + + ret = sizeof(dp); + +bail: + vfree(tmpbuf); + return ret; +} + +static int ipath_diag_release(struct inode *in, struct file *fp) +{ + mutex_lock(&ipath_mutex); + ipath_diag_inuse = 0; + fp->private_data = NULL; + mutex_unlock(&ipath_mutex); + return 0; +} + +static ssize_t ipath_diag_read(struct file *fp, char __user *data, + size_t count, loff_t *off) +{ + struct ipath_devdata *dd = fp->private_data; + void __iomem *kreg_base; + ssize_t ret; + + kreg_base = dd->ipath_kregbase; + + if (count == 0) + ret = 0; + else if ((count % 4) || (*off % 4)) + /* address or length is not 32-bit aligned, hence invalid */ + ret = -EINVAL; + else if (ipath_diag_inuse < 1 && (*off || count != 8)) + ret = -EINVAL; /* prevent cat /dev/ipath_diag* */ + else if ((count % 8) || (*off % 8)) + /* address or length not 64-bit aligned; do 32-bit reads */ + ret = ipath_read_umem32(dd, data, kreg_base + *off, count); + else + ret = ipath_read_umem64(dd, data, kreg_base + *off, count); + + if (ret >= 0) { + *off += count; + ret = count; + if (ipath_diag_inuse == -2) + ipath_diag_inuse++; + } + + return ret; +} + +static ssize_t ipath_diag_write(struct file *fp, const char __user *data, + size_t count, loff_t *off) +{ + struct ipath_devdata *dd = fp->private_data; + void __iomem *kreg_base; + ssize_t ret; + + kreg_base = dd->ipath_kregbase; + + if (count == 0) + ret = 0; + else if ((count % 4) || (*off % 4)) + /* address or length is not 32-bit aligned, hence invalid */ + ret = -EINVAL; + else if ((ipath_diag_inuse == -1 && (*off || count != 8)) || + ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */ + ret = -EINVAL; /* before any other write allowed */ + else if ((count % 8) || (*off % 8)) + /* address or length not 64-bit aligned; do 32-bit writes */ + ret = ipath_write_umem32(dd, kreg_base + *off, data, count); + else + ret = ipath_write_umem64(dd, kreg_base + *off, data, count); + + if (ret >= 0) { + *off += count; + ret = count; + if (ipath_diag_inuse == -1) + ipath_diag_inuse = 1; /* all read/write OK now */ + } + + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c new file mode 100644 index 00000000..644c2c74 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2006 QLogic, Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "ipath_verbs.h" + +#define BAD_DMA_ADDRESS ((u64) 0) + +/* + * The following functions implement driver specific replacements + * for the ib_dma_*() functions. + * + * These functions return kernel virtual addresses instead of + * device bus addresses since the driver uses the CPU to copy + * data instead of using hardware DMA. + */ + +static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == BAD_DMA_ADDRESS; +} + +static u64 ipath_dma_map_single(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + return (u64) cpu_addr; +} + +static void ipath_dma_unmap_single(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static u64 ipath_dma_map_page(struct ib_device *dev, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + BUG_ON(!valid_dma_direction(direction)); + + if (offset + size > PAGE_SIZE) { + addr = BAD_DMA_ADDRESS; + goto done; + } + + addr = (u64) page_address(page); + if (addr) + addr += offset; + /* TODO: handle highmem pages */ + +done: + return addr; +} + +static void ipath_dma_unmap_page(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + u64 addr; + int i; + int ret = nents; + + BUG_ON(!valid_dma_direction(direction)); + + for_each_sg(sgl, sg, nents, i) { + addr = (u64) page_address(sg_page(sg)); + /* TODO: handle highmem pages */ + if (!addr) { + ret = 0; + break; + } + } + return ret; +} + +static void ipath_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) +{ + u64 addr = (u64) page_address(sg_page(sg)); + + if (addr) + addr += sg->offset; + return addr; +} + +static unsigned int ipath_sg_dma_len(struct ib_device *dev, + struct scatterlist *sg) +{ + return sg->length; +} + +static void ipath_sync_single_for_cpu(struct ib_device *dev, + u64 addr, + size_t size, + enum dma_data_direction dir) +{ +} + +static void ipath_sync_single_for_device(struct ib_device *dev, + u64 addr, + size_t size, + enum dma_data_direction dir) +{ +} + +static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + if (dma_handle) + *dma_handle = (u64) addr; + return addr; +} + +static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + free_pages((unsigned long) cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops ipath_dma_mapping_ops = { + ipath_mapping_error, + ipath_dma_map_single, + ipath_dma_unmap_single, + ipath_dma_map_page, + ipath_dma_unmap_page, + ipath_map_sg, + ipath_unmap_sg, + ipath_sg_dma_address, + ipath_sg_dma_len, + ipath_sync_single_for_cpu, + ipath_sync_single_for_device, + ipath_dma_alloc_coherent, + ipath_dma_free_coherent +}; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c new file mode 100644 index 00000000..bfca37b2 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -0,0 +1,2789 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_verbs.h" + +static void ipath_update_pio_bufs(struct ipath_devdata *); + +const char *ipath_get_unit_name(int unit) +{ + static char iname[16]; + snprintf(iname, sizeof iname, "infinipath%u", unit); + return iname; +} + +#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " +#define PFX IPATH_DRV_NAME ": " + +/* + * The size has to be longer than this string, so we can append + * board/chip information to it in the init code. + */ +const char ib_ipath_version[] = IPATH_IDSTR "\n"; + +static struct idr unit_table; +DEFINE_SPINLOCK(ipath_devs_lock); +LIST_HEAD(ipath_dev_list); + +wait_queue_head_t ipath_state_wait; + +unsigned ipath_debug = __IPATH_INFO; + +module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(debug, "mask for debug prints"); +EXPORT_SYMBOL_GPL(ipath_debug); + +unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ +module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); +MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); + +static unsigned ipath_hol_timeout_ms = 13000; +module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); +MODULE_PARM_DESC(hol_timeout_ms, + "duration of user app suspension after link failure"); + +unsigned ipath_linkrecovery = 1; +module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("QLogic "); +MODULE_DESCRIPTION("QLogic InfiniPath driver"); + +/* + * Table to translate the LINKTRAININGSTATE portion of + * IBCStatus to a human-readable form. + */ +const char *ipath_ibcstatus_str[] = { + "Disabled", + "LinkUp", + "PollActive", + "PollQuiet", + "SleepDelay", + "SleepQuiet", + "LState6", /* unused */ + "LState7", /* unused */ + "CfgDebounce", + "CfgRcvfCfg", + "CfgWaitRmt", + "CfgIdle", + "RecovRetrain", + "CfgTxRevLane", /* unused before IBA7220 */ + "RecovWaitRmt", + "RecovIdle", + /* below were added for IBA7220 */ + "CfgEnhanced", + "CfgTest", + "CfgWaitRmtTest", + "CfgWaitCfgEnhanced", + "SendTS_T", + "SendTstIdles", + "RcvTS_T", + "SendTst_TS1s", + "LTState18", "LTState19", "LTState1A", "LTState1B", + "LTState1C", "LTState1D", "LTState1E", "LTState1F" +}; + +static void __devexit ipath_remove_one(struct pci_dev *); +static int __devinit ipath_init_one(struct pci_dev *, + const struct pci_device_id *); + +/* Only needed for registration, nothing else needs this info */ +#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 +#define PCI_DEVICE_ID_INFINIPATH_HT 0xd + +/* Number of seconds before our card status check... */ +#define STATUS_TIMEOUT 60 + +static const struct pci_device_id ipath_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); + +static struct pci_driver ipath_driver = { + .name = IPATH_DRV_NAME, + .probe = ipath_init_one, + .remove = __devexit_p(ipath_remove_one), + .id_table = ipath_pci_tbl, + .driver = { + .groups = ipath_driver_attr_groups, + }, +}; + +static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, + u32 *bar0, u32 *bar1) +{ + int ret; + + ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); + if (ret) + ipath_dev_err(dd, "failed to read bar0 before enable: " + "error %d\n", -ret); + + ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); + if (ret) + ipath_dev_err(dd, "failed to read bar1 before enable: " + "error %d\n", -ret); + + ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); +} + +static void ipath_free_devdata(struct pci_dev *pdev, + struct ipath_devdata *dd) +{ + unsigned long flags; + + pci_set_drvdata(pdev, NULL); + + if (dd->ipath_unit != -1) { + spin_lock_irqsave(&ipath_devs_lock, flags); + idr_remove(&unit_table, dd->ipath_unit); + list_del(&dd->ipath_list); + spin_unlock_irqrestore(&ipath_devs_lock, flags); + } + vfree(dd); +} + +static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) +{ + unsigned long flags; + struct ipath_devdata *dd; + int ret; + + if (!idr_pre_get(&unit_table, GFP_KERNEL)) { + dd = ERR_PTR(-ENOMEM); + goto bail; + } + + dd = vzalloc(sizeof(*dd)); + if (!dd) { + dd = ERR_PTR(-ENOMEM); + goto bail; + } + dd->ipath_unit = -1; + + spin_lock_irqsave(&ipath_devs_lock, flags); + + ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME + ": Could not allocate unit ID: error %d\n", -ret); + ipath_free_devdata(pdev, dd); + dd = ERR_PTR(ret); + goto bail_unlock; + } + + dd->pcidev = pdev; + pci_set_drvdata(pdev, dd); + + list_add(&dd->ipath_list, &ipath_dev_list); + +bail_unlock: + spin_unlock_irqrestore(&ipath_devs_lock, flags); + +bail: + return dd; +} + +static inline struct ipath_devdata *__ipath_lookup(int unit) +{ + return idr_find(&unit_table, unit); +} + +struct ipath_devdata *ipath_lookup(int unit) +{ + struct ipath_devdata *dd; + unsigned long flags; + + spin_lock_irqsave(&ipath_devs_lock, flags); + dd = __ipath_lookup(unit); + spin_unlock_irqrestore(&ipath_devs_lock, flags); + + return dd; +} + +int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) +{ + int nunits, npresent, nup; + struct ipath_devdata *dd; + unsigned long flags; + int maxports; + + nunits = npresent = nup = maxports = 0; + + spin_lock_irqsave(&ipath_devs_lock, flags); + + list_for_each_entry(dd, &ipath_dev_list, ipath_list) { + nunits++; + if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) + npresent++; + if (dd->ipath_lid && + !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN + | IPATH_LINKUNK))) + nup++; + if (dd->ipath_cfgports > maxports) + maxports = dd->ipath_cfgports; + } + + spin_unlock_irqrestore(&ipath_devs_lock, flags); + + if (npresentp) + *npresentp = npresent; + if (nupp) + *nupp = nup; + if (maxportsp) + *maxportsp = maxports; + + return nunits; +} + +/* + * These next two routines are placeholders in case we don't have per-arch + * code for controlling write combining. If explicit control of write + * combining is not available, performance will probably be awful. + */ + +int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) +{ + return -EOPNOTSUPP; +} + +void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) +{ +} + +/* + * Perform a PIO buffer bandwidth write test, to verify proper system + * configuration. Even when all the setup calls work, occasionally + * BIOS or other issues can prevent write combining from working, or + * can cause other bandwidth problems to the chip. + * + * This test simply writes the same buffer over and over again, and + * measures close to the peak bandwidth to the chip (not testing + * data bandwidth to the wire). On chips that use an address-based + * trigger to send packets to the wire, this is easy. On chips that + * use a count to trigger, we want to make sure that the packet doesn't + * go out on the wire, or trigger flow control checks. + */ +static void ipath_verify_pioperf(struct ipath_devdata *dd) +{ + u32 pbnum, cnt, lcnt; + u32 __iomem *piobuf; + u32 *addr; + u64 msecs, emsecs; + + piobuf = ipath_getpiobuf(dd, 0, &pbnum); + if (!piobuf) { + dev_info(&dd->pcidev->dev, + "No PIObufs for checking perf, skipping\n"); + return; + } + + /* + * Enough to give us a reasonable test, less than piobuf size, and + * likely multiple of store buffer length. + */ + cnt = 1024; + + addr = vmalloc(cnt); + if (!addr) { + dev_info(&dd->pcidev->dev, + "Couldn't get memory for checking PIO perf," + " skipping\n"); + goto done; + } + + preempt_disable(); /* we want reasonably accurate elapsed time */ + msecs = 1 + jiffies_to_msecs(jiffies); + for (lcnt = 0; lcnt < 10000U; lcnt++) { + /* wait until we cross msec boundary */ + if (jiffies_to_msecs(jiffies) >= msecs) + break; + udelay(1); + } + + ipath_disable_armlaunch(dd); + + /* + * length 0, no dwords actually sent, and mark as VL15 + * on chips where that may matter (due to IB flowcontrol) + */ + if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) + writeq(1UL << 63, piobuf); + else + writeq(0, piobuf); + ipath_flush_wc(); + + /* + * this is only roughly accurate, since even with preempt we + * still take interrupts that could take a while. Running for + * >= 5 msec seems to get us "close enough" to accurate values + */ + msecs = jiffies_to_msecs(jiffies); + for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { + __iowrite32_copy(piobuf + 64, addr, cnt >> 2); + emsecs = jiffies_to_msecs(jiffies) - msecs; + } + + /* 1 GiB/sec, slightly over IB SDR line rate */ + if (lcnt < (emsecs * 1024U)) + ipath_dev_err(dd, + "Performance problem: bandwidth to PIO buffers is " + "only %u MiB/sec\n", + lcnt / (u32) emsecs); + else + ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n", + lcnt / (u32) emsecs); + + preempt_enable(); + + vfree(addr); + +done: + /* disarm piobuf, so it's available again */ + ipath_disarm_piobufs(dd, pbnum, 1); + ipath_enable_armlaunch(dd); +} + +static void cleanup_device(struct ipath_devdata *dd); + +static int __devinit ipath_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret, len, j; + struct ipath_devdata *dd; + unsigned long long addr; + u32 bar0 = 0, bar1 = 0; + + dd = ipath_alloc_devdata(pdev); + if (IS_ERR(dd)) { + ret = PTR_ERR(dd); + printk(KERN_ERR IPATH_DRV_NAME + ": Could not allocate devdata: error %d\n", -ret); + goto bail; + } + + ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); + + ret = pci_enable_device(pdev); + if (ret) { + /* This can happen iff: + * + * We did a chip reset, and then failed to reprogram the + * BAR, or the chip reset due to an internal error. We then + * unloaded the driver and reloaded it. + * + * Both reset cases set the BAR back to initial state. For + * the latter case, the AER sticky error bit at offset 0x718 + * should be set, but the Linux kernel doesn't yet know + * about that, it appears. If the original BAR was retained + * in the kernel data structures, this may be OK. + */ + ipath_dev_err(dd, "enable unit %d failed: error %d\n", + dd->ipath_unit, -ret); + goto bail_devdata; + } + addr = pci_resource_start(pdev, 0); + len = pci_resource_len(pdev, 0); + ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " + "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, + ent->device, ent->driver_data); + + read_bars(dd, pdev, &bar0, &bar1); + + if (!bar1 && !(bar0 & ~0xf)) { + if (addr) { + dev_info(&pdev->dev, "BAR is 0 (probable RESET), " + "rewriting as %llx\n", addr); + ret = pci_write_config_dword( + pdev, PCI_BASE_ADDRESS_0, addr); + if (ret) { + ipath_dev_err(dd, "rewrite of BAR0 " + "failed: err %d\n", -ret); + goto bail_disable; + } + ret = pci_write_config_dword( + pdev, PCI_BASE_ADDRESS_1, addr >> 32); + if (ret) { + ipath_dev_err(dd, "rewrite of BAR1 " + "failed: err %d\n", -ret); + goto bail_disable; + } + } else { + ipath_dev_err(dd, "BAR is 0 (probable RESET), " + "not usable until reboot\n"); + ret = -ENODEV; + goto bail_disable; + } + } + + ret = pci_request_regions(pdev, IPATH_DRV_NAME); + if (ret) { + dev_info(&pdev->dev, "pci_request_regions unit %u fails: " + "err %d\n", dd->ipath_unit, -ret); + goto bail_disable; + } + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + /* + * if the 64 bit setup fails, try 32 bit. Some systems + * do not setup 64 bit maps on systems with 2GB or less + * memory installed. + */ + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret) { + dev_info(&pdev->dev, + "Unable to set DMA mask for unit %u: %d\n", + dd->ipath_unit, ret); + goto bail_regions; + } + else { + ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret) + dev_info(&pdev->dev, + "Unable to set DMA consistent mask " + "for unit %u: %d\n", + dd->ipath_unit, ret); + + } + } + else { + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) + dev_info(&pdev->dev, + "Unable to set DMA consistent mask " + "for unit %u: %d\n", + dd->ipath_unit, ret); + } + + pci_set_master(pdev); + + /* + * Save BARs to rewrite after device reset. Save all 64 bits of + * BAR, just in case. + */ + dd->ipath_pcibar0 = addr; + dd->ipath_pcibar1 = addr >> 32; + dd->ipath_deviceid = ent->device; /* save for later use */ + dd->ipath_vendorid = ent->vendor; + + /* setup the chip-specific functions, as early as possible. */ + switch (ent->device) { + case PCI_DEVICE_ID_INFINIPATH_HT: + ipath_init_iba6110_funcs(dd); + break; + + default: + ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " + "failing\n", ent->device); + return -ENODEV; + } + + for (j = 0; j < 6; j++) { + if (!pdev->resource[j].start) + continue; + ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n", + j, &pdev->resource[j], + (unsigned long long)pci_resource_len(pdev, j)); + } + + if (!addr) { + ipath_dev_err(dd, "No valid address in BAR 0!\n"); + ret = -ENODEV; + goto bail_regions; + } + + dd->ipath_pcirev = pdev->revision; + +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + dd->ipath_kregbase = __ioremap(addr, len, + (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); +#else + dd->ipath_kregbase = ioremap_nocache(addr, len); +#endif + + if (!dd->ipath_kregbase) { + ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", + addr); + ret = -ENOMEM; + goto bail_iounmap; + } + dd->ipath_kregend = (u64 __iomem *) + ((void __iomem *)dd->ipath_kregbase + len); + dd->ipath_physaddr = addr; /* used for io_remap, etc. */ + /* for user mmap */ + ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", + addr, dd->ipath_kregbase); + + if (dd->ipath_f_bus(dd, pdev)) + ipath_dev_err(dd, "Failed to setup config space; " + "continuing anyway\n"); + + /* + * set up our interrupt handler; IRQF_SHARED probably not needed, + * since MSI interrupts shouldn't be shared but won't hurt for now. + * check 0 irq after we return from chip-specific bus setup, since + * that can affect this due to setup + */ + if (!dd->ipath_irq) + ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " + "work\n"); + else { + ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, + IPATH_DRV_NAME, dd); + if (ret) { + ipath_dev_err(dd, "Couldn't setup irq handler, " + "irq=%d: %d\n", dd->ipath_irq, ret); + goto bail_iounmap; + } + } + + ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ + if (ret) + goto bail_irqsetup; + + ret = ipath_enable_wc(dd); + + if (ret) { + ipath_dev_err(dd, "Write combining not enabled " + "(err %d): performance may be poor\n", + -ret); + ret = 0; + } + + ipath_verify_pioperf(dd); + + ipath_device_create_group(&pdev->dev, dd); + ipathfs_add_device(dd); + ipath_user_add(dd); + ipath_diag_add(dd); + ipath_register_ib_device(dd); + + goto bail; + +bail_irqsetup: + cleanup_device(dd); + + if (dd->ipath_irq) + dd->ipath_f_free_irq(dd); + + if (dd->ipath_f_cleanup) + dd->ipath_f_cleanup(dd); + +bail_iounmap: + iounmap((volatile void __iomem *) dd->ipath_kregbase); + +bail_regions: + pci_release_regions(pdev); + +bail_disable: + pci_disable_device(pdev); + +bail_devdata: + ipath_free_devdata(pdev, dd); + +bail: + return ret; +} + +static void cleanup_device(struct ipath_devdata *dd) +{ + int port; + struct ipath_portdata **tmp; + unsigned long flags; + + if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { + /* can't do anything more with chip; needs re-init */ + *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; + if (dd->ipath_kregbase) { + /* + * if we haven't already cleaned up before these are + * to ensure any register reads/writes "fail" until + * re-init + */ + dd->ipath_kregbase = NULL; + dd->ipath_uregbase = 0; + dd->ipath_sregbase = 0; + dd->ipath_cregbase = 0; + dd->ipath_kregsize = 0; + } + ipath_disable_wc(dd); + } + + if (dd->ipath_spectriggerhit) + dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", + dd->ipath_spectriggerhit); + + if (dd->ipath_pioavailregs_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *) dd->ipath_pioavailregs_dma, + dd->ipath_pioavailregs_phys); + dd->ipath_pioavailregs_dma = NULL; + } + if (dd->ipath_dummy_hdrq) { + dma_free_coherent(&dd->pcidev->dev, + dd->ipath_pd[0]->port_rcvhdrq_size, + dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); + dd->ipath_dummy_hdrq = NULL; + } + + if (dd->ipath_pageshadow) { + struct page **tmpp = dd->ipath_pageshadow; + dma_addr_t *tmpd = dd->ipath_physshadow; + int i, cnt = 0; + + ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " + "locked\n"); + for (port = 0; port < dd->ipath_cfgports; port++) { + int port_tidbase = port * dd->ipath_rcvtidcnt; + int maxtid = port_tidbase + dd->ipath_rcvtidcnt; + for (i = port_tidbase; i < maxtid; i++) { + if (!tmpp[i]) + continue; + pci_unmap_page(dd->pcidev, tmpd[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + ipath_release_user_pages(&tmpp[i], 1); + tmpp[i] = NULL; + cnt++; + } + } + if (cnt) { + ipath_stats.sps_pageunlocks += cnt; + ipath_cdbg(VERBOSE, "There were still %u expTID " + "entries locked\n", cnt); + } + if (ipath_stats.sps_pagelocks || + ipath_stats.sps_pageunlocks) + ipath_cdbg(VERBOSE, "%llu pages locked, %llu " + "unlocked via ipath_m{un}lock\n", + (unsigned long long) + ipath_stats.sps_pagelocks, + (unsigned long long) + ipath_stats.sps_pageunlocks); + + ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", + dd->ipath_pageshadow); + tmpp = dd->ipath_pageshadow; + dd->ipath_pageshadow = NULL; + vfree(tmpp); + + dd->ipath_egrtidbase = NULL; + } + + /* + * free any resources still in use (usually just kernel ports) + * at unload; we do for portcnt, because that's what we allocate. + * We acquire lock to be really paranoid that ipath_pd isn't being + * accessed from some interrupt-related code (that should not happen, + * but best to be sure). + */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); + tmp = dd->ipath_pd; + dd->ipath_pd = NULL; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + for (port = 0; port < dd->ipath_portcnt; port++) { + struct ipath_portdata *pd = tmp[port]; + tmp[port] = NULL; /* debugging paranoia */ + ipath_free_pddata(dd, pd); + } + kfree(tmp); +} + +static void __devexit ipath_remove_one(struct pci_dev *pdev) +{ + struct ipath_devdata *dd = pci_get_drvdata(pdev); + + ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); + + /* + * disable the IB link early, to be sure no new packets arrive, which + * complicates the shutdown process + */ + ipath_shutdown_device(dd); + + flush_workqueue(ib_wq); + + if (dd->verbs_dev) + ipath_unregister_ib_device(dd->verbs_dev); + + ipath_diag_remove(dd); + ipath_user_remove(dd); + ipathfs_remove_device(dd); + ipath_device_remove_group(&pdev->dev, dd); + + ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " + "unit %u\n", dd, (u32) dd->ipath_unit); + + cleanup_device(dd); + + /* + * turn off rcv, send, and interrupts for all ports, all drivers + * should also hard reset the chip here? + * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs + * for all versions of the driver, if they were allocated + */ + if (dd->ipath_irq) { + ipath_cdbg(VERBOSE, "unit %u free irq %d\n", + dd->ipath_unit, dd->ipath_irq); + dd->ipath_f_free_irq(dd); + } else + ipath_dbg("irq is 0, not doing free_irq " + "for unit %u\n", dd->ipath_unit); + /* + * we check for NULL here, because it's outside + * the kregbase check, and we need to call it + * after the free_irq. Thus it's possible that + * the function pointers were never initialized. + */ + if (dd->ipath_f_cleanup) + /* clean up chip-specific stuff */ + dd->ipath_f_cleanup(dd); + + ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); + iounmap((volatile void __iomem *) dd->ipath_kregbase); + pci_release_regions(pdev); + ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); + pci_disable_device(pdev); + + ipath_free_devdata(pdev, dd); +} + +/* general driver use */ +DEFINE_MUTEX(ipath_mutex); + +static DEFINE_SPINLOCK(ipath_pioavail_lock); + +/** + * ipath_disarm_piobufs - cancel a range of PIO buffers + * @dd: the infinipath device + * @first: the first PIO buffer to cancel + * @cnt: the number of PIO buffers to cancel + * + * cancel a range of PIO buffers, used when they might be armed, but + * not triggered. Used at init to ensure buffer state, and also user + * process close, in case it died while writing to a PIO buffer + * Also after errors. + */ +void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, + unsigned cnt) +{ + unsigned i, last = first + cnt; + unsigned long flags; + + ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); + for (i = first; i < last; i++) { + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + /* + * The disarm-related bits are write-only, so it + * is ok to OR them in with our copy of sendctrl + * while we hold the lock. + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl | INFINIPATH_S_DISARM | + (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); + /* can't disarm bufs back-to-back per iba7220 spec */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } + /* on some older chips, update may not happen after cancel */ + ipath_force_pio_avail_update(dd); +} + +/** + * ipath_wait_linkstate - wait for an IB link state change to occur + * @dd: the infinipath device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * wait up to msecs milliseconds for IB link state change to occur for + * now, take the easy polling route. Currently used only by + * ipath_set_linkstate. Returns 0 if state reached, otherwise + * -ETIMEDOUT state can have multiple states set, for any of several + * transitions. + */ +int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) +{ + dd->ipath_state_wanted = state; + wait_event_interruptible_timeout(ipath_state_wait, + (dd->ipath_flags & state), + msecs_to_jiffies(msecs)); + dd->ipath_state_wanted = 0; + + if (!(dd->ipath_flags & state)) { + u64 val; + ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" + " ms\n", + /* test INIT ahead of DOWN, both can be set */ + (state & IPATH_LINKINIT) ? "INIT" : + ((state & IPATH_LINKDOWN) ? "DOWN" : + ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), + msecs); + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); + ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", + (unsigned long long) ipath_read_kreg64( + dd, dd->ipath_kregs->kr_ibcctrl), + (unsigned long long) val, + ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); + } + return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; +} + +static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, + char *buf, size_t blen) +{ + static const struct { + ipath_err_t err; + const char *msg; + } errs[] = { + { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, + { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, + { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, + { INFINIPATH_E_SDMABASE, "SDmaBase" }, + { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, + { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, + { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, + { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, + { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, + { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, + { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, + { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, + }; + int i; + int expected; + size_t bidx = 0; + + for (i = 0; i < ARRAY_SIZE(errs); i++) { + expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : + test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + if ((err & errs[i].err) && !expected) + bidx += snprintf(buf + bidx, blen - bidx, + "%s ", errs[i].msg); + } +} + +/* + * Decode the error status into strings, deciding whether to always + * print * it or not depending on "normal packet errors" vs everything + * else. Return 1 if "real" errors, otherwise 0 if only packet + * errors, so caller can decide what to print with the string. + */ +int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, + ipath_err_t err) +{ + int iserr = 1; + *buf = '\0'; + if (err & INFINIPATH_E_PKTERRS) { + if (!(err & ~INFINIPATH_E_PKTERRS)) + iserr = 0; // if only packet errors. + if (ipath_debug & __IPATH_ERRPKTDBG) { + if (err & INFINIPATH_E_REBP) + strlcat(buf, "EBP ", blen); + if (err & INFINIPATH_E_RVCRC) + strlcat(buf, "VCRC ", blen); + if (err & INFINIPATH_E_RICRC) { + strlcat(buf, "CRC ", blen); + // clear for check below, so only once + err &= INFINIPATH_E_RICRC; + } + if (err & INFINIPATH_E_RSHORTPKTLEN) + strlcat(buf, "rshortpktlen ", blen); + if (err & INFINIPATH_E_SDROPPEDDATAPKT) + strlcat(buf, "sdroppeddatapkt ", blen); + if (err & INFINIPATH_E_SPKTLEN) + strlcat(buf, "spktlen ", blen); + } + if ((err & INFINIPATH_E_RICRC) && + !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) + strlcat(buf, "CRC ", blen); + if (!iserr) + goto done; + } + if (err & INFINIPATH_E_RHDRLEN) + strlcat(buf, "rhdrlen ", blen); + if (err & INFINIPATH_E_RBADTID) + strlcat(buf, "rbadtid ", blen); + if (err & INFINIPATH_E_RBADVERSION) + strlcat(buf, "rbadversion ", blen); + if (err & INFINIPATH_E_RHDR) + strlcat(buf, "rhdr ", blen); + if (err & INFINIPATH_E_SENDSPECIALTRIGGER) + strlcat(buf, "sendspecialtrigger ", blen); + if (err & INFINIPATH_E_RLONGPKTLEN) + strlcat(buf, "rlongpktlen ", blen); + if (err & INFINIPATH_E_RMAXPKTLEN) + strlcat(buf, "rmaxpktlen ", blen); + if (err & INFINIPATH_E_RMINPKTLEN) + strlcat(buf, "rminpktlen ", blen); + if (err & INFINIPATH_E_SMINPKTLEN) + strlcat(buf, "sminpktlen ", blen); + if (err & INFINIPATH_E_RFORMATERR) + strlcat(buf, "rformaterr ", blen); + if (err & INFINIPATH_E_RUNSUPVL) + strlcat(buf, "runsupvl ", blen); + if (err & INFINIPATH_E_RUNEXPCHAR) + strlcat(buf, "runexpchar ", blen); + if (err & INFINIPATH_E_RIBFLOW) + strlcat(buf, "ribflow ", blen); + if (err & INFINIPATH_E_SUNDERRUN) + strlcat(buf, "sunderrun ", blen); + if (err & INFINIPATH_E_SPIOARMLAUNCH) + strlcat(buf, "spioarmlaunch ", blen); + if (err & INFINIPATH_E_SUNEXPERRPKTNUM) + strlcat(buf, "sunexperrpktnum ", blen); + if (err & INFINIPATH_E_SDROPPEDSMPPKT) + strlcat(buf, "sdroppedsmppkt ", blen); + if (err & INFINIPATH_E_SMAXPKTLEN) + strlcat(buf, "smaxpktlen ", blen); + if (err & INFINIPATH_E_SUNSUPVL) + strlcat(buf, "sunsupVL ", blen); + if (err & INFINIPATH_E_INVALIDADDR) + strlcat(buf, "invalidaddr ", blen); + if (err & INFINIPATH_E_RRCVEGRFULL) + strlcat(buf, "rcvegrfull ", blen); + if (err & INFINIPATH_E_RRCVHDRFULL) + strlcat(buf, "rcvhdrfull ", blen); + if (err & INFINIPATH_E_IBSTATUSCHANGED) + strlcat(buf, "ibcstatuschg ", blen); + if (err & INFINIPATH_E_RIBLOSTLINK) + strlcat(buf, "riblostlink ", blen); + if (err & INFINIPATH_E_HARDWARE) + strlcat(buf, "hardware ", blen); + if (err & INFINIPATH_E_RESET) + strlcat(buf, "reset ", blen); + if (err & INFINIPATH_E_SDMAERRS) + decode_sdma_errs(dd, err, buf, blen); + if (err & INFINIPATH_E_INVALIDEEPCMD) + strlcat(buf, "invalideepromcmd ", blen); +done: + return iserr; +} + +/** + * get_rhf_errstring - decode RHF errors + * @err: the err number + * @msg: the output buffer + * @len: the length of the output buffer + * + * only used one place now, may want more later + */ +static void get_rhf_errstring(u32 err, char *msg, size_t len) +{ + /* if no errors, and so don't need to check what's first */ + *msg = '\0'; + + if (err & INFINIPATH_RHF_H_ICRCERR) + strlcat(msg, "icrcerr ", len); + if (err & INFINIPATH_RHF_H_VCRCERR) + strlcat(msg, "vcrcerr ", len); + if (err & INFINIPATH_RHF_H_PARITYERR) + strlcat(msg, "parityerr ", len); + if (err & INFINIPATH_RHF_H_LENERR) + strlcat(msg, "lenerr ", len); + if (err & INFINIPATH_RHF_H_MTUERR) + strlcat(msg, "mtuerr ", len); + if (err & INFINIPATH_RHF_H_IHDRERR) + /* infinipath hdr checksum error */ + strlcat(msg, "ipathhdrerr ", len); + if (err & INFINIPATH_RHF_H_TIDERR) + strlcat(msg, "tiderr ", len); + if (err & INFINIPATH_RHF_H_MKERR) + /* bad port, offset, etc. */ + strlcat(msg, "invalid ipathhdr ", len); + if (err & INFINIPATH_RHF_H_IBERR) + strlcat(msg, "iberr ", len); + if (err & INFINIPATH_RHF_L_SWA) + strlcat(msg, "swA ", len); + if (err & INFINIPATH_RHF_L_SWB) + strlcat(msg, "swB ", len); +} + +/** + * ipath_get_egrbuf - get an eager buffer + * @dd: the infinipath device + * @bufnum: the eager buffer to get + * + * must only be called if ipath_pd[port] is known to be allocated + */ +static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) +{ + return dd->ipath_port0_skbinfo ? + (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; +} + +/** + * ipath_alloc_skb - allocate an skb and buffer with possible constraints + * @dd: the infinipath device + * @gfp_mask: the sk_buff SFP mask + */ +struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, + gfp_t gfp_mask) +{ + struct sk_buff *skb; + u32 len; + + /* + * Only fully supported way to handle this is to allocate lots + * extra, align as needed, and then do skb_reserve(). That wastes + * a lot of memory... I'll have to hack this into infinipath_copy + * also. + */ + + /* + * We need 2 extra bytes for ipath_ether data sent in the + * key header. In order to keep everything dword aligned, + * we'll reserve 4 bytes. + */ + len = dd->ipath_ibmaxlen + 4; + + if (dd->ipath_flags & IPATH_4BYTE_TID) { + /* We need a 2KB multiple alignment, and there is no way + * to do it except to allocate extra and then skb_reserve + * enough to bring it up to the right alignment. + */ + len += 2047; + } + + skb = __dev_alloc_skb(len, gfp_mask); + if (!skb) { + ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", + len); + goto bail; + } + + skb_reserve(skb, 4); + + if (dd->ipath_flags & IPATH_4BYTE_TID) { + u32 una = (unsigned long)skb->data & 2047; + if (una) + skb_reserve(skb, 2048 - una); + } + +bail: + return skb; +} + +static void ipath_rcv_hdrerr(struct ipath_devdata *dd, + u32 eflags, + u32 l, + u32 etail, + __le32 *rhf_addr, + struct ipath_message_header *hdr) +{ + char emsg[128]; + + get_rhf_errstring(eflags, emsg, sizeof emsg); + ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " + "tlen=%x opcode=%x egridx=%x: %s\n", + eflags, l, + ipath_hdrget_rcv_type(rhf_addr), + ipath_hdrget_length_in_bytes(rhf_addr), + be32_to_cpu(hdr->bth[0]) >> 24, + etail, emsg); + + /* Count local link integrity errors. */ + if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { + u8 n = (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + + if (++dd->ipath_lli_counter > n) { + dd->ipath_lli_counter = 0; + dd->ipath_lli_errors++; + } + } +} + +/* + * ipath_kreceive - receive a packet + * @pd: the infinipath port + * + * called from interrupt handler for errors or receive interrupt + */ +void ipath_kreceive(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + __le32 *rhf_addr; + void *ebuf; + const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ + const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ + u32 etail = -1, l, hdrqtail; + struct ipath_message_header *hdr; + u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; + static u64 totcalls; /* stats, may eventually remove */ + int last; + + l = pd->port_head; + rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; + if (dd->ipath_flags & IPATH_NODMA_RTAIL) { + u32 seq = ipath_hdrget_seq(rhf_addr); + + if (seq != pd->port_seq_cnt) + goto bail; + hdrqtail = 0; + } else { + hdrqtail = ipath_get_rcvhdrtail(pd); + if (l == hdrqtail) + goto bail; + smp_rmb(); + } + +reloop: + for (last = 0, i = 1; !last; i += !last) { + hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); + eflags = ipath_hdrget_err_flags(rhf_addr); + etype = ipath_hdrget_rcv_type(rhf_addr); + /* total length */ + tlen = ipath_hdrget_length_in_bytes(rhf_addr); + ebuf = NULL; + if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? + ipath_hdrget_use_egr_buf(rhf_addr) : + (etype != RCVHQ_RCV_TYPE_EXPECTED)) { + /* + * It turns out that the chip uses an eager buffer + * for all non-expected packets, whether it "needs" + * one or not. So always get the index, but don't + * set ebuf (so we try to copy data) unless the + * length requires it. + */ + etail = ipath_hdrget_index(rhf_addr); + updegr = 1; + if (tlen > sizeof(*hdr) || + etype == RCVHQ_RCV_TYPE_NON_KD) + ebuf = ipath_get_egrbuf(dd, etail); + } + + /* + * both tiderr and ipathhdrerr are set for all plain IB + * packets; only ipathhdrerr should be set. + */ + + if (etype != RCVHQ_RCV_TYPE_NON_KD && + etype != RCVHQ_RCV_TYPE_ERROR && + ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != + IPS_PROTO_VERSION) + ipath_cdbg(PKT, "Bad InfiniPath protocol version " + "%x\n", etype); + + if (unlikely(eflags)) + ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); + else if (etype == RCVHQ_RCV_TYPE_NON_KD) { + ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); + if (dd->ipath_lli_counter) + dd->ipath_lli_counter--; + } else if (etype == RCVHQ_RCV_TYPE_EAGER) { + u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; + u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; + ipath_cdbg(PKT, "typ %x, opcode %x (eager, " + "qp=%x), len %x; ignored\n", + etype, opcode, qp, tlen); + } + else if (etype == RCVHQ_RCV_TYPE_EXPECTED) + ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", + be32_to_cpu(hdr->bth[0]) >> 24); + else { + /* + * error packet, type of error unknown. + * Probably type 3, but we don't know, so don't + * even try to print the opcode, etc. + * Usually caused by a "bad packet", that has no + * BTH, when the LRH says it should. + */ + ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" + " %x, len %x hdrq+%x rhf: %Lx\n", + etail, tlen, l, (unsigned long long) + le64_to_cpu(*(__le64 *) rhf_addr)); + if (ipath_debug & __IPATH_ERRPKTDBG) { + u32 j, *d, dw = rsize-2; + if (rsize > (tlen>>2)) + dw = tlen>>2; + d = (u32 *)hdr; + printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", + dw); + for (j = 0; j < dw; j++) + printk(KERN_DEBUG "%8x%s", d[j], + (j%8) == 7 ? "\n" : " "); + printk(KERN_DEBUG ".\n"); + } + } + l += rsize; + if (l >= maxcnt) + l = 0; + rhf_addr = (__le32 *) pd->port_rcvhdrq + + l + dd->ipath_rhf_offset; + if (dd->ipath_flags & IPATH_NODMA_RTAIL) { + u32 seq = ipath_hdrget_seq(rhf_addr); + + if (++pd->port_seq_cnt > 13) + pd->port_seq_cnt = 1; + if (seq != pd->port_seq_cnt) + last = 1; + } else if (l == hdrqtail) + last = 1; + /* + * update head regs on last packet, and every 16 packets. + * Reduce bus traffic, while still trying to prevent + * rcvhdrq overflows, for when the queue is nearly full + */ + if (last || !(i & 0xf)) { + u64 lval = l; + + /* request IBA6120 and 7220 interrupt only on last */ + if (last) + lval |= dd->ipath_rhdrhead_intr_off; + ipath_write_ureg(dd, ur_rcvhdrhead, lval, + pd->port_port); + if (updegr) { + ipath_write_ureg(dd, ur_rcvegrindexhead, + etail, pd->port_port); + updegr = 0; + } + } + } + + if (!dd->ipath_rhdrhead_intr_off && !reloop && + !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { + /* IBA6110 workaround; we can have a race clearing chip + * interrupt with another interrupt about to be delivered, + * and can clear it before it is delivered on the GPIO + * workaround. By doing the extra check here for the + * in-memory tail register updating while we were doing + * earlier packets, we "almost" guarantee we have covered + * that case. + */ + u32 hqtail = ipath_get_rcvhdrtail(pd); + if (hqtail != hdrqtail) { + hdrqtail = hqtail; + reloop = 1; /* loop 1 extra time at most */ + goto reloop; + } + } + + pkttot += i; + + pd->port_head = l; + + if (pkttot > ipath_stats.sps_maxpkts_call) + ipath_stats.sps_maxpkts_call = pkttot; + ipath_stats.sps_port0pkts += pkttot; + ipath_stats.sps_avgpkts_call = + ipath_stats.sps_port0pkts / ++totcalls; + +bail:; +} + +/** + * ipath_update_pio_bufs - update shadow copy of the PIO availability map + * @dd: the infinipath device + * + * called whenever our local copy indicates we have run out of send buffers + * NOTE: This can be called from interrupt context by some code + * and from non-interrupt context by ipath_getpiobuf(). + */ + +static void ipath_update_pio_bufs(struct ipath_devdata *dd) +{ + unsigned long flags; + int i; + const unsigned piobregs = (unsigned)dd->ipath_pioavregs; + + /* If the generation (check) bits have changed, then we update the + * busy bit for the corresponding PIO buffer. This algorithm will + * modify positions to the value they already have in some cases + * (i.e., no change), but it's faster than changing only the bits + * that have changed. + * + * We would like to do this atomicly, to avoid spinlocks in the + * critical send path, but that's not really possible, given the + * type of changes, and that this routine could be called on + * multiple cpu's simultaneously, so we lock in this routine only, + * to avoid conflicting updates; all we change is the shadow, and + * it's a single 64 bit memory location, so by definition the update + * is atomic in terms of what other cpu's can see in testing the + * bits. The spin_lock overhead isn't too bad, since it only + * happens when all buffers are in use, so only cpu overhead, not + * latency or bandwidth is affected. + */ + if (!dd->ipath_pioavailregs_dma) { + ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); + return; + } + if (ipath_debug & __IPATH_VERBDBG) { + /* only if packet debug and verbose */ + volatile __le64 *dma = dd->ipath_pioavailregs_dma; + unsigned long *shadow = dd->ipath_pioavailshadow; + + ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " + "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " + "s3=%lx\n", + (unsigned long long) le64_to_cpu(dma[0]), + shadow[0], + (unsigned long long) le64_to_cpu(dma[1]), + shadow[1], + (unsigned long long) le64_to_cpu(dma[2]), + shadow[2], + (unsigned long long) le64_to_cpu(dma[3]), + shadow[3]); + if (piobregs > 4) + ipath_cdbg( + PKT, "2nd group, dma4=%llx shad4=%lx, " + "d5=%llx s5=%lx, d6=%llx s6=%lx, " + "d7=%llx s7=%lx\n", + (unsigned long long) le64_to_cpu(dma[4]), + shadow[4], + (unsigned long long) le64_to_cpu(dma[5]), + shadow[5], + (unsigned long long) le64_to_cpu(dma[6]), + shadow[6], + (unsigned long long) le64_to_cpu(dma[7]), + shadow[7]); + } + spin_lock_irqsave(&ipath_pioavail_lock, flags); + for (i = 0; i < piobregs; i++) { + u64 pchbusy, pchg, piov, pnew; + /* + * Chip Errata: bug 6641; even and odd qwords>3 are swapped + */ + if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) + piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); + else + piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); + pchg = dd->ipath_pioavailkernel[i] & + ~(dd->ipath_pioavailshadow[i] ^ piov); + pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; + if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { + pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; + pnew |= piov & pchbusy; + dd->ipath_pioavailshadow[i] = pnew; + } + } + spin_unlock_irqrestore(&ipath_pioavail_lock, flags); +} + +/* + * used to force update of pioavailshadow if we can't get a pio buffer. + * Needed primarily due to exitting freeze mode after recovering + * from errors. Done lazily, because it's safer (known to not + * be writing pio buffers). + */ +static void ipath_reset_availshadow(struct ipath_devdata *dd) +{ + int i, im; + unsigned long flags; + + spin_lock_irqsave(&ipath_pioavail_lock, flags); + for (i = 0; i < dd->ipath_pioavregs; i++) { + u64 val, oldval; + /* deal with 6110 chip bug on high register #s */ + im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? + i ^ 1 : i; + val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); + /* + * busy out the buffers not in the kernel avail list, + * without changing the generation bits. + */ + oldval = dd->ipath_pioavailshadow[i]; + dd->ipath_pioavailshadow[i] = val | + ((~dd->ipath_pioavailkernel[i] << + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & + 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ + if (oldval != dd->ipath_pioavailshadow[i]) + ipath_dbg("shadow[%d] was %Lx, now %lx\n", + i, (unsigned long long) oldval, + dd->ipath_pioavailshadow[i]); + } + spin_unlock_irqrestore(&ipath_pioavail_lock, flags); +} + +/** + * ipath_setrcvhdrsize - set the receive header size + * @dd: the infinipath device + * @rhdrsize: the receive header size + * + * called from user init code, and also layered driver init + */ +int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) +{ + int ret = 0; + + if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { + if (dd->ipath_rcvhdrsize != rhdrsize) { + dev_info(&dd->pcidev->dev, + "Error: can't set protocol header " + "size %u, already %u\n", + rhdrsize, dd->ipath_rcvhdrsize); + ret = -EAGAIN; + } else + ipath_cdbg(VERBOSE, "Reuse same protocol header " + "size %u\n", dd->ipath_rcvhdrsize); + } else if (rhdrsize > (dd->ipath_rcvhdrentsize - + (sizeof(u64) / sizeof(u32)))) { + ipath_dbg("Error: can't set protocol header size %u " + "(> max %u)\n", rhdrsize, + dd->ipath_rcvhdrentsize - + (u32) (sizeof(u64) / sizeof(u32))); + ret = -EOVERFLOW; + } else { + dd->ipath_flags |= IPATH_RCVHDRSZ_SET; + dd->ipath_rcvhdrsize = rhdrsize; + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, + dd->ipath_rcvhdrsize); + ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", + dd->ipath_rcvhdrsize); + } + return ret; +} + +/* + * debugging code and stats updates if no pio buffers available. + */ +static noinline void no_pio_bufs(struct ipath_devdata *dd) +{ + unsigned long *shadow = dd->ipath_pioavailshadow; + __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; + + dd->ipath_upd_pio_shadow = 1; + + /* + * not atomic, but if we lose a stat count in a while, that's OK + */ + ipath_stats.sps_nopiobufs++; + if (!(++dd->ipath_consec_nopiobuf % 100000)) { + ipath_force_pio_avail_update(dd); /* at start */ + ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " + "%llx %llx %llx %llx\n" + "ipath shadow: %lx %lx %lx %lx\n", + dd->ipath_consec_nopiobuf, + (unsigned long)get_cycles(), + (unsigned long long) le64_to_cpu(dma[0]), + (unsigned long long) le64_to_cpu(dma[1]), + (unsigned long long) le64_to_cpu(dma[2]), + (unsigned long long) le64_to_cpu(dma[3]), + shadow[0], shadow[1], shadow[2], shadow[3]); + /* + * 4 buffers per byte, 4 registers above, cover rest + * below + */ + if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > + (sizeof(shadow[0]) * 4 * 4)) + ipath_dbg("2nd group: dmacopy: " + "%llx %llx %llx %llx\n" + "ipath shadow: %lx %lx %lx %lx\n", + (unsigned long long)le64_to_cpu(dma[4]), + (unsigned long long)le64_to_cpu(dma[5]), + (unsigned long long)le64_to_cpu(dma[6]), + (unsigned long long)le64_to_cpu(dma[7]), + shadow[4], shadow[5], shadow[6], shadow[7]); + + /* at end, so update likely happened */ + ipath_reset_availshadow(dd); + } +} + +/* + * common code for normal driver pio buffer allocation, and reserved + * allocation. + * + * do appropriate marking as busy, etc. + * returns buffer number if one found (>=0), negative number is error. + */ +static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, + u32 *pbufnum, u32 first, u32 last, u32 firsti) +{ + int i, j, updated = 0; + unsigned piobcnt; + unsigned long flags; + unsigned long *shadow = dd->ipath_pioavailshadow; + u32 __iomem *buf; + + piobcnt = last - first; + if (dd->ipath_upd_pio_shadow) { + /* + * Minor optimization. If we had no buffers on last call, + * start out by doing the update; continue and do scan even + * if no buffers were updated, to be paranoid + */ + ipath_update_pio_bufs(dd); + updated++; + i = first; + } else + i = firsti; +rescan: + /* + * while test_and_set_bit() is atomic, we do that and then the + * change_bit(), and the pair is not. See if this is the cause + * of the remaining armlaunch errors. + */ + spin_lock_irqsave(&ipath_pioavail_lock, flags); + for (j = 0; j < piobcnt; j++, i++) { + if (i >= last) + i = first; + if (__test_and_set_bit((2 * i) + 1, shadow)) + continue; + /* flip generation bit */ + __change_bit(2 * i, shadow); + break; + } + spin_unlock_irqrestore(&ipath_pioavail_lock, flags); + + if (j == piobcnt) { + if (!updated) { + /* + * first time through; shadow exhausted, but may be + * buffers available, try an update and then rescan. + */ + ipath_update_pio_bufs(dd); + updated++; + i = first; + goto rescan; + } else if (updated == 1 && piobcnt <= + ((dd->ipath_sendctrl + >> INFINIPATH_S_UPDTHRESH_SHIFT) & + INFINIPATH_S_UPDTHRESH_MASK)) { + /* + * for chips supporting and using the update + * threshold we need to force an update of the + * in-memory copy if the count is less than the + * thershold, then check one more time. + */ + ipath_force_pio_avail_update(dd); + ipath_update_pio_bufs(dd); + updated++; + i = first; + goto rescan; + } + + no_pio_bufs(dd); + buf = NULL; + } else { + if (i < dd->ipath_piobcnt2k) + buf = (u32 __iomem *) (dd->ipath_pio2kbase + + i * dd->ipath_palign); + else + buf = (u32 __iomem *) + (dd->ipath_pio4kbase + + (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); + if (pbufnum) + *pbufnum = i; + } + + return buf; +} + +/** + * ipath_getpiobuf - find an available pio buffer + * @dd: the infinipath device + * @plen: the size of the PIO buffer needed in 32-bit words + * @pbufnum: the buffer number is placed here + */ +u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) +{ + u32 __iomem *buf; + u32 pnum, nbufs; + u32 first, lasti; + + if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { + first = dd->ipath_piobcnt2k; + lasti = dd->ipath_lastpioindexl; + } else { + first = 0; + lasti = dd->ipath_lastpioindex; + } + nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); + + if (buf) { + /* + * Set next starting place. It's just an optimization, + * it doesn't matter who wins on this, so no locking + */ + if (plen + 1 >= IPATH_SMALLBUF_DWORDS) + dd->ipath_lastpioindexl = pnum + 1; + else + dd->ipath_lastpioindex = pnum + 1; + if (dd->ipath_upd_pio_shadow) + dd->ipath_upd_pio_shadow = 0; + if (dd->ipath_consec_nopiobuf) + dd->ipath_consec_nopiobuf = 0; + ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", + pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); + if (pbufnum) + *pbufnum = pnum; + + } + return buf; +} + +/** + * ipath_chg_pioavailkernel - change which send buffers are available for kernel + * @dd: the infinipath device + * @start: the starting send buffer number + * @len: the number of send buffers + * @avail: true if the buffers are available for kernel use, false otherwise + */ +void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, + unsigned len, int avail) +{ + unsigned long flags; + unsigned end, cnt = 0; + + /* There are two bits per send buffer (busy and generation) */ + start *= 2; + end = start + len * 2; + + spin_lock_irqsave(&ipath_pioavail_lock, flags); + /* Set or clear the busy bit in the shadow. */ + while (start < end) { + if (avail) { + unsigned long dma; + int i, im; + /* + * the BUSY bit will never be set, because we disarm + * the user buffers before we hand them back to the + * kernel. We do have to make sure the generation + * bit is set correctly in shadow, since it could + * have changed many times while allocated to user. + * We can't use the bitmap functions on the full + * dma array because it is always little-endian, so + * we have to flip to host-order first. + * BITS_PER_LONG is slightly wrong, since it's + * always 64 bits per register in chip... + * We only work on 64 bit kernels, so that's OK. + */ + /* deal with 6110 chip bug on high register #s */ + i = start / BITS_PER_LONG; + im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? + i ^ 1 : i; + __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT + + start, dd->ipath_pioavailshadow); + dma = (unsigned long) le64_to_cpu( + dd->ipath_pioavailregs_dma[im]); + if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start) % BITS_PER_LONG, &dma)) + __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->ipath_pioavailshadow); + else + __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->ipath_pioavailshadow); + __set_bit(start, dd->ipath_pioavailkernel); + } else { + __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, + dd->ipath_pioavailshadow); + __clear_bit(start, dd->ipath_pioavailkernel); + } + start += 2; + } + + if (dd->ipath_pioupd_thresh) { + end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + cnt = bitmap_weight(dd->ipath_pioavailkernel, end); + } + spin_unlock_irqrestore(&ipath_pioavail_lock, flags); + + /* + * When moving buffers from kernel to user, if number assigned to + * the user is less than the pio update threshold, and threshold + * is supported (cnt was computed > 0), drop the update threshold + * so we update at least once per allocated number of buffers. + * In any case, if the kernel buffers are less than the threshold, + * drop the threshold. We don't bother increasing it, having once + * decreased it, since it would typically just cycle back and forth. + * If we don't decrease below buffers in use, we can wait a long + * time for an update, until some other context uses PIO buffers. + */ + if (!avail && len < cnt) + cnt = len; + if (cnt < dd->ipath_pioupd_thresh) { + dd->ipath_pioupd_thresh = cnt; + ipath_dbg("Decreased pio update threshold to %u\n", + dd->ipath_pioupd_thresh); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK + << INFINIPATH_S_UPDTHRESH_SHIFT); + dd->ipath_sendctrl |= dd->ipath_pioupd_thresh + << INFINIPATH_S_UPDTHRESH_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } +} + +/** + * ipath_create_rcvhdrq - create a receive header queue + * @dd: the infinipath device + * @pd: the port data + * + * this must be contiguous memory (from an i/o perspective), and must be + * DMA'able (which means for some systems, it will go through an IOMMU, + * or be forced into a low address range). + */ +int ipath_create_rcvhdrq(struct ipath_devdata *dd, + struct ipath_portdata *pd) +{ + int ret = 0; + + if (!pd->port_rcvhdrq) { + dma_addr_t phys_hdrqtail; + gfp_t gfp_flags = GFP_USER | __GFP_COMP; + int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * + sizeof(u32), PAGE_SIZE); + + pd->port_rcvhdrq = dma_alloc_coherent( + &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, + gfp_flags); + + if (!pd->port_rcvhdrq) { + ipath_dev_err(dd, "attempt to allocate %d bytes " + "for port %u rcvhdrq failed\n", + amt, pd->port_port); + ret = -ENOMEM; + goto bail; + } + + if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { + pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( + &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + GFP_KERNEL); + if (!pd->port_rcvhdrtail_kvaddr) { + ipath_dev_err(dd, "attempt to allocate 1 page " + "for port %u rcvhdrqtailaddr " + "failed\n", pd->port_port); + ret = -ENOMEM; + dma_free_coherent(&dd->pcidev->dev, amt, + pd->port_rcvhdrq, + pd->port_rcvhdrq_phys); + pd->port_rcvhdrq = NULL; + goto bail; + } + pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; + ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " + "physical\n", pd->port_port, + (unsigned long long) phys_hdrqtail); + } + + pd->port_rcvhdrq_size = amt; + + ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " + "for port %u rcvhdr Q\n", + amt >> PAGE_SHIFT, pd->port_rcvhdrq, + (unsigned long) pd->port_rcvhdrq_phys, + (unsigned long) pd->port_rcvhdrq_size, + pd->port_port); + } + else + ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " + "hdrtailaddr@%p %llx physical\n", + pd->port_port, pd->port_rcvhdrq, + (unsigned long long) pd->port_rcvhdrq_phys, + pd->port_rcvhdrtail_kvaddr, (unsigned long long) + pd->port_rcvhdrqtailaddr_phys); + + /* clear for security and sanity on each use */ + memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); + if (pd->port_rcvhdrtail_kvaddr) + memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); + + /* + * tell chip each time we init it, even if we are re-using previous + * memory (we zero the register at process close) + */ + ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, + pd->port_port, pd->port_rcvhdrqtailaddr_phys); + ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, + pd->port_port, pd->port_rcvhdrq_phys); + +bail: + return ret; +} + + +/* + * Flush all sends that might be in the ready to send state, as well as any + * that are in the process of being sent. Used whenever we need to be + * sure the send side is idle. Cleans up all buffer state by canceling + * all pio buffers, and issuing an abort, which cleans up anything in the + * launch fifo. The cancel is superfluous on some chip versions, but + * it's safer to always do it. + * PIOAvail bits are updated by the chip as if normal send had happened. + */ +void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) +{ + unsigned long flags; + + if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { + ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); + goto bail; + } + /* + * If we have SDMA, and it's not disabled, we have to kick off the + * abort state machine, provided we aren't already aborting. + * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), + * we skip the rest of this routine. It is already "in progress" + */ + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { + int skip_cancel; + unsigned long *statp = &dd->ipath_sdma_status; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + skip_cancel = + test_and_set_bit(IPATH_SDMA_ABORTING, statp) + && !test_bit(IPATH_SDMA_DISABLED, statp); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (skip_cancel) + goto bail; + } + + ipath_dbg("Cancelling all in-progress send buffers\n"); + + /* skip armlaunch errs for a while */ + dd->ipath_lastcancel = jiffies + HZ / 2; + + /* + * The abort bit is auto-clearing. We also don't want pioavail + * update happening during this, and we don't want any other + * sends going out, so turn those off for the duration. We read + * the scratch register to be sure that cancels and the abort + * have taken effect in the chip. Otherwise two parts are same + * as ipath_force_pio_avail_update() + */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD + | INFINIPATH_S_PIOENABLE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl | INFINIPATH_S_ABORT); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* disarm all send buffers */ + ipath_disarm_piobufs(dd, 0, + dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + + if (restore_sendctrl) { + /* else done by caller later if needed */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | + INFINIPATH_S_PIOENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + /* and again, be sure all have hit the chip */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } + + if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && + !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && + test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + /* only wait so long for intr */ + dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; + dd->ipath_sdma_reset_wait = 200; + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + } +bail:; +} + +/* + * Force an update of in-memory copy of the pioavail registers, when + * needed for any of a variety of reasons. We read the scratch register + * to make it highly likely that the update will have happened by the + * time we return. If already off (as in cancel_sends above), this + * routine is a nop, on the assumption that the caller will "do the + * right thing". + */ +void ipath_force_pio_avail_update(struct ipath_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + } + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); +} + +static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, + int linitcmd) +{ + u64 mod_wd; + static const char *what[4] = { + [0] = "NOP", + [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", + [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", + [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" + }; + + if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + */ + preempt_disable(); + dd->ipath_flags |= IPATH_IB_LINK_DISABLED; + preempt_enable(); + } else if (linitcmd) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + preempt_disable(); + dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; + preempt_enable(); + } + + mod_wd = (linkcmd << dd->ibcc_lc_shift) | + (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); + ipath_cdbg(VERBOSE, + "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", + dd->ipath_unit, what[linkcmd], linitcmd, + ipath_ibcstatus_str[ipath_ib_linktrstate(dd, + ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl | mod_wd); + /* read from chip so write is flushed */ + (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); +} + +int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) +{ + u32 lstate; + int ret; + + switch (newstate) { + case IPATH_IB_LINKDOWN_ONLY: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, + INFINIPATH_IBCC_LINKINITCMD_POLL); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_SLEEP: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, + INFINIPATH_IBCC_LINKINITCMD_SLEEP); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_DISABLE: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, + INFINIPATH_IBCC_LINKINITCMD_DISABLE); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKARM: + if (dd->ipath_flags & IPATH_LINKARMED) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & + (IPATH_LINKINIT | IPATH_LINKACTIVE))) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); + + /* + * Since the port can transition to ACTIVE by receiving + * a non VL 15 packet, wait for either state. + */ + lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; + break; + + case IPATH_IB_LINKACTIVE: + if (dd->ipath_flags & IPATH_LINKACTIVE) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & IPATH_LINKARMED)) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); + lstate = IPATH_LINKACTIVE; + break; + + case IPATH_IB_LINK_LOOPBACK: + dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); + dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + + /* turn heartbeat off, as it causes loopback to fail */ + dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_OFF); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINK_EXTERNAL: + dev_info(&dd->pcidev->dev, + "Disabling IB local loopback (normal)\n"); + dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_ON); + dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + /* don't wait */ + ret = 0; + goto bail; + + /* + * Heartbeat can be explicitly enabled by the user via + * "hrtbt_enable" "file", and if disabled, trying to enable here + * will have no effect. Implicit changes (heartbeat off when + * loopback on, and vice versa) are included to ease testing. + */ + case IPATH_IB_LINK_HRTBT: + ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_ON); + goto bail; + + case IPATH_IB_LINK_NO_HRTBT: + ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, + IPATH_IB_HRTBT_OFF); + goto bail; + + default: + ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); + ret = -EINVAL; + goto bail; + } + ret = ipath_wait_linkstate(dd, lstate, 2000); + +bail: + return ret; +} + +/** + * ipath_set_mtu - set the MTU + * @dd: the infinipath device + * @arg: the new MTU + * + * we can handle "any" incoming size, the issue here is whether we + * need to restrict our outgoing size. For now, we don't do any + * sanity checking on this, and we don't deal with what happens to + * programs that are already running when the size changes. + * NOTE: changing the MTU will usually cause the IBC to go back to + * link INIT state... + */ +int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) +{ + u32 piosize; + int changed = 0; + int ret; + + /* + * mtu is IB data payload max. It's the largest power of 2 less + * than piosize (or even larger, since it only really controls the + * largest we can receive; we can send the max of the mtu and + * piosize). We check that it's one of the valid IB sizes. + */ + if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && + (arg != 4096 || !ipath_mtu4096)) { + ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); + ret = -EINVAL; + goto bail; + } + if (dd->ipath_ibmtu == arg) { + ret = 0; /* same as current */ + goto bail; + } + + piosize = dd->ipath_ibmaxlen; + dd->ipath_ibmtu = arg; + + if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { + /* Only if it's not the initial value (or reset to it) */ + if (piosize != dd->ipath_init_ibmaxlen) { + if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) + piosize = dd->ipath_init_ibmaxlen; + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { + piosize = arg + IPATH_PIO_MAXIBHDR; + ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " + "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, + arg); + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + + if (changed) { + u64 ibc = dd->ipath_ibcctrl, ibdw; + /* + * update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords + */ + dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); + ibdw = (dd->ipath_ibmaxlen >> 2) + 1; + ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << + dd->ibcc_mpl_shift); + ibc |= ibdw << dd->ibcc_mpl_shift; + dd->ipath_ibcctrl = ibc; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + dd->ipath_f_tidtemplate(dd); + } + + ret = 0; + +bail: + return ret; +} + +int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) +{ + dd->ipath_lid = lid; + dd->ipath_lmc = lmc; + + dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | + (~((1U << lmc) - 1)) << 16); + + dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); + + return 0; +} + + +/** + * ipath_write_kreg_port - write a device's per-port 64-bit kernel register + * @dd: the infinipath device + * @regno: the register number to write + * @port: the port containing the register + * @value: the value to write + * + * Registers that vary with the chip implementation constants (port) + * use this routine. + */ +void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, + unsigned port, u64 value) +{ + u16 where; + + if (port < dd->ipath_portcnt && + (regno == dd->ipath_kregs->kr_rcvhdraddr || + regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) + where = regno + port; + else + where = -1; + + ipath_write_kreg(dd, where, value); +} + +/* + * Following deal with the "obviously simple" task of overriding the state + * of the LEDS, which normally indicate link physical and logical status. + * The complications arise in dealing with different hardware mappings + * and the board-dependent routine being called from interrupts. + * and then there's the requirement to _flash_ them. + */ +#define LED_OVER_FREQ_SHIFT 8 +#define LED_OVER_FREQ_MASK (0xFF<ipath_flags & IPATH_INITTED)) + return; + + pidx = dd->ipath_led_override_phase++ & 1; + dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; + timeoff = dd->ipath_led_override_timeoff; + + /* + * below potentially restores the LED values per current status, + * should also possibly setup the traffic-blink register, + * but leave that to per-chip functions. + */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); + ltstate = ipath_ib_linktrstate(dd, val); + lstate = ipath_ib_linkstate(dd, val); + + dd->ipath_f_setextled(dd, lstate, ltstate); + mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); +} + +void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) +{ + int timeoff, freq; + + if (!(dd->ipath_flags & IPATH_INITTED)) + return; + + /* First check if we are blinking. If not, use 1HZ polling */ + timeoff = HZ; + freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; + + if (freq) { + /* For blink, set each phase from one nybble of val */ + dd->ipath_led_override_vals[0] = val & 0xF; + dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; + timeoff = (HZ << 4)/freq; + } else { + /* Non-blink set both phases the same. */ + dd->ipath_led_override_vals[0] = val & 0xF; + dd->ipath_led_override_vals[1] = val & 0xF; + } + dd->ipath_led_override_timeoff = timeoff; + + /* + * If the timer has not already been started, do so. Use a "quick" + * timeout so the function will be called soon, to look at our request. + */ + if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { + /* Need to start timer */ + init_timer(&dd->ipath_led_override_timer); + dd->ipath_led_override_timer.function = + ipath_run_led_override; + dd->ipath_led_override_timer.data = (unsigned long) dd; + dd->ipath_led_override_timer.expires = jiffies + 1; + add_timer(&dd->ipath_led_override_timer); + } else + atomic_dec(&dd->ipath_led_override_timer_active); +} + +/** + * ipath_shutdown_device - shut down a device + * @dd: the infinipath device + * + * This is called to make the device quiet when we are about to + * unload the driver, and also when the device is administratively + * disabled. It does not free any data structures. + * Everything it does has to be setup again by ipath_init_chip(dd,1) + */ +void ipath_shutdown_device(struct ipath_devdata *dd) +{ + unsigned long flags; + + ipath_dbg("Shutting down the device\n"); + + ipath_hol_up(dd); /* make sure user processes aren't suspended */ + + dd->ipath_flags |= IPATH_LINKUNK; + dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | + IPATH_LINKINIT | IPATH_LINKARMED | + IPATH_LINKACTIVE); + *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | + IPATH_STATUS_IB_READY); + + /* mask interrupts, but not errors */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); + + dd->ipath_rcvctrl = 0; + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + + /* + * gracefully stop all sends allowing any in progress to trickle out + * first. + */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl = 0; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + /* flush it */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* + * enough for anything that's going to trickle out to have actually + * done so. + */ + udelay(5); + + dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ + + ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); + ipath_cancel_sends(dd, 0); + + /* + * we are shutting down, so tell components that care. We don't do + * this on just a link state change, much like ethernet, a cable + * unplug, etc. doesn't change driver state + */ + signal_ib_event(dd, IB_EVENT_PORT_ERR); + + /* disable IBC */ + dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control | INFINIPATH_C_FREEZEMODE); + + /* + * clear SerdesEnable and turn the leds off; do this here because + * we are unloading, so don't count on interrupts to move along + * Turn the LEDs off explicitly for the same reason. + */ + dd->ipath_f_quiet_serdes(dd); + + /* stop all the timers that might still be running */ + del_timer_sync(&dd->ipath_hol_timer); + if (dd->ipath_stats_timer_active) { + del_timer_sync(&dd->ipath_stats_timer); + dd->ipath_stats_timer_active = 0; + } + if (dd->ipath_intrchk_timer.data) { + del_timer_sync(&dd->ipath_intrchk_timer); + dd->ipath_intrchk_timer.data = 0; + } + if (atomic_read(&dd->ipath_led_override_timer_active)) { + del_timer_sync(&dd->ipath_led_override_timer); + atomic_set(&dd->ipath_led_override_timer_active, 0); + } + + /* + * clear all interrupts and errors, so that the next time the driver + * is loaded or device is enabled, we know that whatever is set + * happened while we were unloaded + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, + ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); + + ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); + ipath_update_eeprom_log(dd); +} + +/** + * ipath_free_pddata - free a port's allocated data + * @dd: the infinipath device + * @pd: the portdata structure + * + * free up any allocated data for a port + * This should not touch anything that would affect a simultaneous + * re-allocation of port data, because it is called after ipath_mutex + * is released (and can be called from reinit as well). + * It should never change any chip state, or global driver state. + * (The only exception to global state is freeing the port0 port0_skbs.) + */ +void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) +{ + if (!pd) + return; + + if (pd->port_rcvhdrq) { + ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " + "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, + (unsigned long) pd->port_rcvhdrq_size); + dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, + pd->port_rcvhdrq, pd->port_rcvhdrq_phys); + pd->port_rcvhdrq = NULL; + if (pd->port_rcvhdrtail_kvaddr) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + pd->port_rcvhdrtail_kvaddr, + pd->port_rcvhdrqtailaddr_phys); + pd->port_rcvhdrtail_kvaddr = NULL; + } + } + if (pd->port_port && pd->port_rcvegrbuf) { + unsigned e; + + for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { + void *base = pd->port_rcvegrbuf[e]; + size_t size = pd->port_rcvegrbuf_size; + + ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " + "chunk %u/%u\n", base, + (unsigned long) size, + e, pd->port_rcvegrbuf_chunks); + dma_free_coherent(&dd->pcidev->dev, size, + base, pd->port_rcvegrbuf_phys[e]); + } + kfree(pd->port_rcvegrbuf); + pd->port_rcvegrbuf = NULL; + kfree(pd->port_rcvegrbuf_phys); + pd->port_rcvegrbuf_phys = NULL; + pd->port_rcvegrbuf_chunks = 0; + } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { + unsigned e; + struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; + + dd->ipath_port0_skbinfo = NULL; + ipath_cdbg(VERBOSE, "free closed port %d " + "ipath_port0_skbinfo @ %p\n", pd->port_port, + skbinfo); + for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) + if (skbinfo[e].skb) { + pci_unmap_single(dd->pcidev, skbinfo[e].phys, + dd->ipath_ibmaxlen, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(skbinfo[e].skb); + } + vfree(skbinfo); + } + kfree(pd->port_tid_pg_list); + vfree(pd->subport_uregbase); + vfree(pd->subport_rcvegrbuf); + vfree(pd->subport_rcvhdr_base); + kfree(pd); +} + +static int __init infinipath_init(void) +{ + int ret; + + if (ipath_debug & __IPATH_DBG) + printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); + + /* + * These must be called before the driver is registered with + * the PCI subsystem. + */ + idr_init(&unit_table); + if (!idr_pre_get(&unit_table, GFP_KERNEL)) { + printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); + ret = -ENOMEM; + goto bail; + } + + ret = pci_register_driver(&ipath_driver); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME + ": Unable to register driver: error %d\n", -ret); + goto bail_unit; + } + + ret = ipath_init_ipathfs(); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " + "ipathfs: error %d\n", -ret); + goto bail_pci; + } + + goto bail; + +bail_pci: + pci_unregister_driver(&ipath_driver); + +bail_unit: + idr_destroy(&unit_table); + +bail: + return ret; +} + +static void __exit infinipath_cleanup(void) +{ + ipath_exit_ipathfs(); + + ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); + pci_unregister_driver(&ipath_driver); + + idr_destroy(&unit_table); +} + +/** + * ipath_reset_device - reset the chip if possible + * @unit: the device to reset + * + * Whether or not reset is successful, we attempt to re-initialize the chip + * (that is, much like a driver unload/reload). We clear the INITTED flag + * so that the various entry points will fail until we reinitialize. For + * now, we only allow this if no user ports are open that use chip resources + */ +int ipath_reset_device(int unit) +{ + int ret, i; + struct ipath_devdata *dd = ipath_lookup(unit); + unsigned long flags; + + if (!dd) { + ret = -ENODEV; + goto bail; + } + + if (atomic_read(&dd->ipath_led_override_timer_active)) { + /* Need to stop LED timer, _then_ shut off LEDs */ + del_timer_sync(&dd->ipath_led_override_timer); + atomic_set(&dd->ipath_led_override_timer_active, 0); + } + + /* Shut off LEDs after we are sure timer is not running */ + dd->ipath_led_override = LED_OVER_BOTH_OFF; + dd->ipath_f_setextled(dd, 0, 0); + + dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); + + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { + dev_info(&dd->pcidev->dev, "Invalid unit number %u or " + "not initialized or not present\n", unit); + ret = -ENXIO; + goto bail; + } + + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); + if (dd->ipath_pd) + for (i = 1; i < dd->ipath_cfgports; i++) { + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) + continue; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + ipath_dbg("unit %u port %d is in use " + "(PID %u cmd %s), can't reset\n", + unit, i, + pid_nr(dd->ipath_pd[i]->port_pid), + dd->ipath_pd[i]->port_comm); + ret = -EBUSY; + goto bail; + } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + + dd->ipath_flags &= ~IPATH_INITTED; + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); + ret = dd->ipath_f_reset(dd); + if (ret == 1) { + ipath_dbg("Reinitializing unit %u after reset attempt\n", + unit); + ret = ipath_init_chip(dd, 1); + } else + ret = -EAGAIN; + if (ret) + ipath_dev_err(dd, "Reinitialize unit %u after " + "reset failed with %d\n", unit, ret); + else + dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " + "resetting\n", unit); + +bail: + return ret; +} + +/* + * send a signal to all the processes that have the driver open + * through the normal interfaces (i.e., everything other than diags + * interface). Returns number of signalled processes. + */ +static int ipath_signal_procs(struct ipath_devdata *dd, int sig) +{ + int i, sub, any = 0; + struct pid *pid; + unsigned long flags; + + if (!dd->ipath_pd) + return 0; + + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); + for (i = 1; i < dd->ipath_cfgports; i++) { + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) + continue; + pid = dd->ipath_pd[i]->port_pid; + if (!pid) + continue; + + dev_info(&dd->pcidev->dev, "context %d in use " + "(PID %u), sending signal %d\n", + i, pid_nr(pid), sig); + kill_pid(pid, sig, 1); + any++; + for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { + pid = dd->ipath_pd[i]->port_subpid[sub]; + if (!pid) + continue; + dev_info(&dd->pcidev->dev, "sub-context " + "%d:%d in use (PID %u), sending " + "signal %d\n", i, sub, pid_nr(pid), sig); + kill_pid(pid, sig, 1); + any++; + } + } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + return any; +} + +static void ipath_hol_signal_down(struct ipath_devdata *dd) +{ + if (ipath_signal_procs(dd, SIGSTOP)) + ipath_dbg("Stopped some processes\n"); + ipath_cancel_sends(dd, 1); +} + + +static void ipath_hol_signal_up(struct ipath_devdata *dd) +{ + if (ipath_signal_procs(dd, SIGCONT)) + ipath_dbg("Continued some processes\n"); +} + +/* + * link is down, stop any users processes, and flush pending sends + * to prevent HoL blocking, then start the HoL timer that + * periodically continues, then stop procs, so they can detect + * link down if they want, and do something about it. + * Timer may already be running, so use mod_timer, not add_timer. + */ +void ipath_hol_down(struct ipath_devdata *dd) +{ + dd->ipath_hol_state = IPATH_HOL_DOWN; + ipath_hol_signal_down(dd); + dd->ipath_hol_next = IPATH_HOL_DOWNCONT; + dd->ipath_hol_timer.expires = jiffies + + msecs_to_jiffies(ipath_hol_timeout_ms); + mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); +} + +/* + * link is up, continue any user processes, and ensure timer + * is a nop, if running. Let timer keep running, if set; it + * will nop when it sees the link is up + */ +void ipath_hol_up(struct ipath_devdata *dd) +{ + ipath_hol_signal_up(dd); + dd->ipath_hol_state = IPATH_HOL_UP; +} + +/* + * toggle the running/not running state of user proceses + * to prevent HoL blocking on chip resources, but still allow + * user processes to do link down special case handling. + * Should only be called via the timer + */ +void ipath_hol_event(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *)opaque; + + if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP + && dd->ipath_hol_state != IPATH_HOL_UP) { + dd->ipath_hol_next = IPATH_HOL_DOWNCONT; + ipath_dbg("Stopping processes\n"); + ipath_hol_signal_down(dd); + } else { /* may do "extra" if also in ipath_hol_up() */ + dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; + ipath_dbg("Continuing processes\n"); + ipath_hol_signal_up(dd); + } + if (dd->ipath_hol_state == IPATH_HOL_UP) + ipath_dbg("link's up, don't resched timer\n"); + else { + dd->ipath_hol_timer.expires = jiffies + + msecs_to_jiffies(ipath_hol_timeout_ms); + mod_timer(&dd->ipath_hol_timer, + dd->ipath_hol_timer.expires); + } +} + +int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) +{ + u64 val; + + if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) + return -1; + if (dd->ipath_rx_pol_inv != new_pol_inv) { + dd->ipath_rx_pol_inv = new_pol_inv; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= ((u64)dd->ipath_rx_pol_inv) << + INFINIPATH_XGXS_RX_POL_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + } + return 0; +} + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing on + * the 7220, which is count-based, rather than trigger-based. Safe for the + * driver check, since it's at init. Not completely safe when used for + * user-mode checking, since some error checking can be lost, but not + * particularly risky, and only has problematic side-effects in the face of + * very buggy user code. There is no reference counting, but that's also + * fine, given the intended use. + */ +void ipath_enable_armlaunch(struct ipath_devdata *dd) +{ + dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, + INFINIPATH_E_SPIOARMLAUNCH); + dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); +} + +void ipath_disable_armlaunch(struct ipath_devdata *dd) +{ + /* so don't re-enable if already set */ + dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; + dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); +} + +module_init(infinipath_init); +module_exit(infinipath_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c new file mode 100644 index 00000000..fc718198 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -0,0 +1,1183 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "ipath_kernel.h" + +/* + * InfiniPath I2C driver for a serial eeprom. This is not a generic + * I2C interface. For a start, the device we're using (Atmel AT24C11) + * doesn't work like a regular I2C device. It looks like one + * electrically, but not logically. Normal I2C devices have a single + * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit + * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78 + * to 0x7F are special reserved addresses (e.g. 0x00 is the "general + * call" address.) The Atmel device, on the other hand, responds to ALL + * 7-bit addresses. It's designed to be the only device on a given I2C + * bus. A 7-bit address corresponds to the memory address within the + * Atmel device itself. + * + * Also, the timing requirements mean more than simple software + * bitbanging, with readbacks from chip to ensure timing (simple udelay + * is not enough). + * + * This all means that accessing the device is specialized enough + * that using the standard kernel I2C bitbanging interface would be + * impossible. For example, the core I2C eeprom driver expects to find + * a device at one or more of a limited set of addresses only. It doesn't + * allow writing to an eeprom. It also doesn't provide any means of + * accessing eeprom contents from within the kernel, only via sysfs. + */ + +/* Added functionality for IBA7220-based cards */ +#define IPATH_EEPROM_DEV_V1 0xA0 +#define IPATH_EEPROM_DEV_V2 0xA2 +#define IPATH_TEMP_DEV 0x98 +#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2) +#define IPATH_NO_DEV (0xFF) + +/* + * The number of I2C chains is proliferating. Table below brings + * some order to the madness. The basic principle is that the + * table is scanned from the top, and a "probe" is made to the + * device probe_dev. If that succeeds, the chain is considered + * to be of that type, and dd->i2c_chain_type is set to the index+1 + * of the entry. + * The +1 is so static initialization can mean "unknown, do probe." + */ +static struct i2c_chain_desc { + u8 probe_dev; /* If seen at probe, chain is this type */ + u8 eeprom_dev; /* Dev addr (if any) for EEPROM */ + u8 temp_dev; /* Dev Addr (if any) for Temp-sense */ +} i2c_chains[] = { + { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */ + { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */ + { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */ + { IPATH_NO_DEV } +}; + +enum i2c_type { + i2c_line_scl = 0, + i2c_line_sda +}; + +enum i2c_state { + i2c_line_low = 0, + i2c_line_high +}; + +#define READ_CMD 1 +#define WRITE_CMD 0 + +/** + * i2c_gpio_set - set a GPIO line + * @dd: the infinipath device + * @line: the line to set + * @new_line_state: the state to set + * + * Returns 0 if the line was set to the new state successfully, non-zero + * on error. + */ +static int i2c_gpio_set(struct ipath_devdata *dd, + enum i2c_type line, + enum i2c_state new_line_state) +{ + u64 out_mask, dir_mask, *gpioval; + unsigned long flags = 0; + + gpioval = &dd->ipath_gpio_out; + + if (line == i2c_line_scl) { + dir_mask = dd->ipath_gpio_scl; + out_mask = (1UL << dd->ipath_gpio_scl_num); + } else { + dir_mask = dd->ipath_gpio_sda; + out_mask = (1UL << dd->ipath_gpio_sda_num); + } + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + if (new_line_state == i2c_line_high) { + /* tri-state the output rather than force high */ + dd->ipath_extctrl &= ~dir_mask; + } else { + /* config line to be an output */ + dd->ipath_extctrl |= dir_mask; + } + ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); + + /* set output as well (no real verify) */ + if (new_line_state == i2c_line_high) + *gpioval |= out_mask; + else + *gpioval &= ~out_mask; + + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); + + return 0; +} + +/** + * i2c_gpio_get - get a GPIO line state + * @dd: the infinipath device + * @line: the line to get + * @curr_statep: where to put the line state + * + * Returns 0 if the line was set to the new state successfully, non-zero + * on error. curr_state is not set on error. + */ +static int i2c_gpio_get(struct ipath_devdata *dd, + enum i2c_type line, + enum i2c_state *curr_statep) +{ + u64 read_val, mask; + int ret; + unsigned long flags = 0; + + /* check args */ + if (curr_statep == NULL) { + ret = 1; + goto bail; + } + + /* config line to be an input */ + if (line == i2c_line_scl) + mask = dd->ipath_gpio_scl; + else + mask = dd->ipath_gpio_sda; + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + dd->ipath_extctrl &= ~mask; + ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); + /* + * Below is very unlikely to reflect true input state if Output + * Enable actually changed. + */ + read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); + + if (read_val & mask) + *curr_statep = i2c_line_high; + else + *curr_statep = i2c_line_low; + + ret = 0; + +bail: + return ret; +} + +/** + * i2c_wait_for_writes - wait for a write + * @dd: the infinipath device + * + * We use this instead of udelay directly, so we can make sure + * that previous register writes have been flushed all the way + * to the chip. Since we are delaying anyway, the cost doesn't + * hurt, and makes the bit twiddling more regular + */ +static void i2c_wait_for_writes(struct ipath_devdata *dd) +{ + (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + rmb(); +} + +static void scl_out(struct ipath_devdata *dd, u8 bit) +{ + udelay(1); + i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low); + + i2c_wait_for_writes(dd); +} + +static void sda_out(struct ipath_devdata *dd, u8 bit) +{ + i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low); + + i2c_wait_for_writes(dd); +} + +static u8 sda_in(struct ipath_devdata *dd, int wait) +{ + enum i2c_state bit; + + if (i2c_gpio_get(dd, i2c_line_sda, &bit)) + ipath_dbg("get bit failed!\n"); + + if (wait) + i2c_wait_for_writes(dd); + + return bit == i2c_line_high ? 1U : 0; +} + +/** + * i2c_ackrcv - see if ack following write is true + * @dd: the infinipath device + */ +static int i2c_ackrcv(struct ipath_devdata *dd) +{ + u8 ack_received; + + /* AT ENTRY SCL = LOW */ + /* change direction, ignore data */ + ack_received = sda_in(dd, 1); + scl_out(dd, i2c_line_high); + ack_received = sda_in(dd, 1) == 0; + scl_out(dd, i2c_line_low); + return ack_received; +} + +/** + * rd_byte - read a byte, leaving ACK, STOP, etc up to caller + * @dd: the infinipath device + * + * Returns byte shifted out of device + */ +static int rd_byte(struct ipath_devdata *dd) +{ + int bit_cntr, data; + + data = 0; + + for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { + data <<= 1; + scl_out(dd, i2c_line_high); + data |= sda_in(dd, 0); + scl_out(dd, i2c_line_low); + } + return data; +} + +/** + * wr_byte - write a byte, one bit at a time + * @dd: the infinipath device + * @data: the byte to write + * + * Returns 0 if we got the following ack, otherwise 1 + */ +static int wr_byte(struct ipath_devdata *dd, u8 data) +{ + int bit_cntr; + u8 bit; + + for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { + bit = (data >> bit_cntr) & 1; + sda_out(dd, bit); + scl_out(dd, i2c_line_high); + scl_out(dd, i2c_line_low); + } + return (!i2c_ackrcv(dd)) ? 1 : 0; +} + +static void send_ack(struct ipath_devdata *dd) +{ + sda_out(dd, i2c_line_low); + scl_out(dd, i2c_line_high); + scl_out(dd, i2c_line_low); + sda_out(dd, i2c_line_high); +} + +/** + * i2c_startcmd - transmit the start condition, followed by address/cmd + * @dd: the infinipath device + * @offset_dir: direction byte + * + * (both clock/data high, clock high, data low while clock is high) + */ +static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir) +{ + int res; + + /* issue start sequence */ + sda_out(dd, i2c_line_high); + scl_out(dd, i2c_line_high); + sda_out(dd, i2c_line_low); + scl_out(dd, i2c_line_low); + + /* issue length and direction byte */ + res = wr_byte(dd, offset_dir); + + if (res) + ipath_cdbg(VERBOSE, "No ack to complete start\n"); + + return res; +} + +/** + * stop_cmd - transmit the stop condition + * @dd: the infinipath device + * + * (both clock/data low, clock high, data high while clock is high) + */ +static void stop_cmd(struct ipath_devdata *dd) +{ + scl_out(dd, i2c_line_low); + sda_out(dd, i2c_line_low); + scl_out(dd, i2c_line_high); + sda_out(dd, i2c_line_high); + udelay(2); +} + +/** + * eeprom_reset - reset I2C communication + * @dd: the infinipath device + */ + +static int eeprom_reset(struct ipath_devdata *dd) +{ + int clock_cycles_left = 9; + u64 *gpioval = &dd->ipath_gpio_out; + int ret; + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + /* Make sure shadows are consistent */ + dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); + *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); + + ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg " + "is %llx\n", (unsigned long long) *gpioval); + + /* + * This is to get the i2c into a known state, by first going low, + * then tristate sda (and then tristate scl as first thing + * in loop) + */ + scl_out(dd, i2c_line_low); + sda_out(dd, i2c_line_high); + + /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */ + while (clock_cycles_left--) { + scl_out(dd, i2c_line_high); + + /* SDA seen high, issue START by dropping it while SCL high */ + if (sda_in(dd, 0)) { + sda_out(dd, i2c_line_low); + scl_out(dd, i2c_line_low); + /* ATMEL spec says must be followed by STOP. */ + scl_out(dd, i2c_line_high); + sda_out(dd, i2c_line_high); + ret = 0; + goto bail; + } + + scl_out(dd, i2c_line_low); + } + + ret = 1; + +bail: + return ret; +} + +/* + * Probe for I2C device at specified address. Returns 0 for "success" + * to match rest of this file. + * Leave bus in "reasonable" state for further commands. + */ +static int i2c_probe(struct ipath_devdata *dd, int devaddr) +{ + int ret = 0; + + ret = eeprom_reset(dd); + if (ret) { + ipath_dev_err(dd, "Failed reset probing device 0x%02X\n", + devaddr); + return ret; + } + /* + * Reset no longer leaves bus in start condition, so normal + * i2c_startcmd() will do. + */ + ret = i2c_startcmd(dd, devaddr | READ_CMD); + if (ret) + ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n", + devaddr); + else { + /* + * Device did respond. Complete a single-byte read, because some + * devices apparently cannot handle STOP immediately after they + * ACK the start-cmd. + */ + int data; + data = rd_byte(dd); + stop_cmd(dd); + ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr); + } + return ret; +} + +/* + * Returns the "i2c type". This is a pointer to a struct that describes + * the I2C chain on this board. To minimize impact on struct ipath_devdata, + * the (small integer) index into the table is actually memoized, rather + * then the pointer. + * Memoization is because the type is determined on the first call per chip. + * An alternative would be to move type determination to early + * init code. + */ +static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd) +{ + int idx; + + /* Get memoized index, from previous successful probes */ + idx = dd->ipath_i2c_chain_type - 1; + if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1)) + goto done; + + idx = 0; + while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) { + /* if probe succeeds, this is type */ + if (!i2c_probe(dd, i2c_chains[idx].probe_dev)) + break; + ++idx; + } + + /* + * Old EEPROM (first entry) may require a reset after probe, + * rather than being able to "start" after "stop" + */ + if (idx == 0) + eeprom_reset(dd); + + if (i2c_chains[idx].probe_dev == IPATH_NO_DEV) + idx = -1; + else + dd->ipath_i2c_chain_type = idx + 1; +done: + return (idx >= 0) ? i2c_chains + idx : NULL; +} + +static int ipath_eeprom_internal_read(struct ipath_devdata *dd, + u8 eeprom_offset, void *buffer, int len) +{ + int ret; + struct i2c_chain_desc *icd; + u8 *bp = buffer; + + ret = 1; + icd = ipath_i2c_type(dd); + if (!icd) + goto bail; + + if (icd->eeprom_dev == IPATH_NO_DEV) { + /* legacy not-really-I2C */ + ipath_cdbg(VERBOSE, "Start command only address\n"); + eeprom_offset = (eeprom_offset << 1) | READ_CMD; + ret = i2c_startcmd(dd, eeprom_offset); + } else { + /* Actual I2C */ + ipath_cdbg(VERBOSE, "Start command uses devaddr\n"); + if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) { + ipath_dbg("Failed EEPROM startcmd\n"); + stop_cmd(dd); + ret = 1; + goto bail; + } + ret = wr_byte(dd, eeprom_offset); + stop_cmd(dd); + if (ret) { + ipath_dev_err(dd, "Failed to write EEPROM address\n"); + ret = 1; + goto bail; + } + ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD); + } + if (ret) { + ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev); + stop_cmd(dd); + ret = 1; + goto bail; + } + + /* + * eeprom keeps clocking data out as long as we ack, automatically + * incrementing the address. + */ + while (len-- > 0) { + /* get and store data */ + *bp++ = rd_byte(dd); + /* send ack if not the last byte */ + if (len) + send_ack(dd); + } + + stop_cmd(dd); + + ret = 0; + +bail: + return ret; +} + +static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buffer, int len) +{ + int sub_len; + const u8 *bp = buffer; + int max_wait_time, i; + int ret; + struct i2c_chain_desc *icd; + + ret = 1; + icd = ipath_i2c_type(dd); + if (!icd) + goto bail; + + while (len > 0) { + if (icd->eeprom_dev == IPATH_NO_DEV) { + if (i2c_startcmd(dd, + (eeprom_offset << 1) | WRITE_CMD)) { + ipath_dbg("Failed to start cmd offset %u\n", + eeprom_offset); + goto failed_write; + } + } else { + /* Real I2C */ + if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) { + ipath_dbg("Failed EEPROM startcmd\n"); + goto failed_write; + } + ret = wr_byte(dd, eeprom_offset); + if (ret) { + ipath_dev_err(dd, "Failed to write EEPROM " + "address\n"); + goto failed_write; + } + } + + sub_len = min(len, 4); + eeprom_offset += sub_len; + len -= sub_len; + + for (i = 0; i < sub_len; i++) { + if (wr_byte(dd, *bp++)) { + ipath_dbg("no ack after byte %u/%u (%u " + "total remain)\n", i, sub_len, + len + sub_len - i); + goto failed_write; + } + } + + stop_cmd(dd); + + /* + * wait for write complete by waiting for a successful + * read (the chip replies with a zero after the write + * cmd completes, and before it writes to the eeprom. + * The startcmd for the read will fail the ack until + * the writes have completed. We do this inline to avoid + * the debug prints that are in the real read routine + * if the startcmd fails. + * We also use the proper device address, so it doesn't matter + * whether we have real eeprom_dev. legacy likes any address. + */ + max_wait_time = 100; + while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) { + stop_cmd(dd); + if (!--max_wait_time) { + ipath_dbg("Did not get successful read to " + "complete write\n"); + goto failed_write; + } + } + /* now read (and ignore) the resulting byte */ + rd_byte(dd); + stop_cmd(dd); + } + + ret = 0; + goto bail; + +failed_write: + stop_cmd(dd); + ret = 1; + +bail: + return ret; +} + +/** + * ipath_eeprom_read - receives bytes from the eeprom via I2C + * @dd: the infinipath device + * @eeprom_offset: address to read from + * @buffer: where to store result + * @len: number of bytes to receive + */ +int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, + void *buff, int len) +{ + int ret; + + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); + if (!ret) { + ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); + mutex_unlock(&dd->ipath_eep_lock); + } + + return ret; +} + +/** + * ipath_eeprom_write - writes data to the eeprom via I2C + * @dd: the infinipath device + * @eeprom_offset: where to place data + * @buffer: data to write + * @len: number of bytes to write + */ +int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buff, int len) +{ + int ret; + + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); + if (!ret) { + ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); + mutex_unlock(&dd->ipath_eep_lock); + } + + return ret; +} + +static u8 flash_csum(struct ipath_flash *ifp, int adjust) +{ + u8 *ip = (u8 *) ifp; + u8 csum = 0, len; + + /* + * Limit length checksummed to max length of actual data. + * Checksum of erased eeprom will still be bad, but we avoid + * reading past the end of the buffer we were passed. + */ + len = ifp->if_length; + if (len > sizeof(struct ipath_flash)) + len = sizeof(struct ipath_flash); + while (len--) + csum += *ip++; + csum -= ifp->if_csum; + csum = ~csum; + if (adjust) + ifp->if_csum = csum; + + return csum; +} + +/** + * ipath_get_guid - get the GUID from the i2c device + * @dd: the infinipath device + * + * We have the capability to use the ipath_nguid field, and get + * the guid from the first chip's flash, to use for all of them. + */ +void ipath_get_eeprom_info(struct ipath_devdata *dd) +{ + void *buf; + struct ipath_flash *ifp; + __be64 guid; + int len, eep_stat; + u8 csum, *bguid; + int t = dd->ipath_unit; + struct ipath_devdata *dd0 = ipath_lookup(0); + + if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) { + u8 oguid; + dd->ipath_guid = dd0->ipath_guid; + bguid = (u8 *) & dd->ipath_guid; + + oguid = bguid[7]; + bguid[7] += t; + if (oguid > bguid[7]) { + if (bguid[6] == 0xff) { + if (bguid[5] == 0xff) { + ipath_dev_err( + dd, + "Can't set %s GUID from " + "base, wraps to OUI!\n", + ipath_get_unit_name(t)); + dd->ipath_guid = 0; + goto bail; + } + bguid[5]++; + } + bguid[6]++; + } + dd->ipath_nguid = 1; + + ipath_dbg("nguid %u, so adding %u to device 0 guid, " + "for %llx\n", + dd0->ipath_nguid, t, + (unsigned long long) be64_to_cpu(dd->ipath_guid)); + goto bail; + } + + /* + * read full flash, not just currently used part, since it may have + * been written with a newer definition + * */ + len = sizeof(struct ipath_flash); + buf = vmalloc(len); + if (!buf) { + ipath_dev_err(dd, "Couldn't allocate memory to read %u " + "bytes from eeprom for GUID\n", len); + goto bail; + } + + mutex_lock(&dd->ipath_eep_lock); + eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); + mutex_unlock(&dd->ipath_eep_lock); + + if (eep_stat) { + ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); + goto done; + } + ifp = (struct ipath_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: " + "0x%x, not 0x%x\n", csum, ifp->if_csum); + goto done; + } + if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || + *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { + ipath_dev_err(dd, "Invalid GUID %llx from flash; " + "ignoring\n", + *(unsigned long long *) ifp->if_guid); + /* don't allow GUID if all 0 or all 1's */ + goto done; + } + + /* complain, but allow it */ + if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) + dev_info(&dd->pcidev->dev, "Warning, GUID %llx is " + "default, probably not correct!\n", + *(unsigned long long *) ifp->if_guid); + + bguid = ifp->if_guid; + if (!bguid[0] && !bguid[1] && !bguid[2]) { + /* original incorrect GUID format in flash; fix in + * core copy, by shifting up 2 octets; don't need to + * change top octet, since both it and shifted are + * 0.. */ + bguid[1] = bguid[3]; + bguid[2] = bguid[4]; + bguid[3] = bguid[4] = 0; + guid = *(__be64 *) ifp->if_guid; + ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, " + "shifting 2 octets\n"); + } else + guid = *(__be64 *) ifp->if_guid; + dd->ipath_guid = guid; + dd->ipath_nguid = ifp->if_numguid; + /* + * Things are slightly complicated by the desire to transparently + * support both the Pathscale 10-digit serial number and the QLogic + * 13-character version. + */ + if ((ifp->if_fversion > 1) && ifp->if_sprefix[0] + && ((u8 *)ifp->if_sprefix)[0] != 0xFF) { + /* This board has a Serial-prefix, which is stored + * elsewhere for backward-compatibility. + */ + char *snp = dd->ipath_serial; + memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); + snp[sizeof ifp->if_sprefix] = '\0'; + len = strlen(snp); + snp += len; + len = (sizeof dd->ipath_serial) - len; + if (len > sizeof ifp->if_serial) { + len = sizeof ifp->if_serial; + } + memcpy(snp, ifp->if_serial, len); + } else + memcpy(dd->ipath_serial, ifp->if_serial, + sizeof ifp->if_serial); + if (!strstr(ifp->if_comment, "Tested successfully")) + ipath_dev_err(dd, "Board SN %s did not pass functional " + "test: %s\n", dd->ipath_serial, + ifp->if_comment); + + ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", + (unsigned long long) be64_to_cpu(dd->ipath_guid)); + + memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT); + /* + * Power-on (actually "active") hours are kept as little-endian value + * in EEPROM, but as seconds in a (possibly as small as 24-bit) + * atomic_t while running. + */ + atomic_set(&dd->ipath_active_time, 0); + dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); + +done: + vfree(buf); + +bail:; +} + +/** + * ipath_update_eeprom_log - copy active-time and error counters to eeprom + * @dd: the infinipath device + * + * Although the time is kept as seconds in the ipath_devdata struct, it is + * rounded to hours for re-write, as we have only 16 bits in EEPROM. + * First-cut code reads whole (expected) struct ipath_flash, modifies, + * re-writes. Future direction: read/write only what we need, assuming + * that the EEPROM had to have been "good enough" for driver init, and + * if not, we aren't making it worse. + * + */ + +int ipath_update_eeprom_log(struct ipath_devdata *dd) +{ + void *buf; + struct ipath_flash *ifp; + int len, hi_water; + uint32_t new_time, new_hrs; + u8 csum; + int ret, idx; + unsigned long flags; + + /* first, check if we actually need to do anything. */ + ret = 0; + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + if (dd->ipath_eep_st_new_errs[idx]) { + ret = 1; + break; + } + } + new_time = atomic_read(&dd->ipath_active_time); + + if (ret == 0 && new_time < 3600) + return 0; + + /* + * The quick-check above determined that there is something worthy + * of logging, so get current contents and do a more detailed idea. + * read full flash, not just currently used part, since it may have + * been written with a newer definition + */ + len = sizeof(struct ipath_flash); + buf = vmalloc(len); + ret = 1; + if (!buf) { + ipath_dev_err(dd, "Couldn't allocate memory to read %u " + "bytes from eeprom for logging\n", len); + goto bail; + } + + /* Grab semaphore and read current EEPROM. If we get an + * error, let go, but if not, keep it until we finish write. + */ + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); + if (ret) { + ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); + goto free_bail; + } + ret = ipath_eeprom_internal_read(dd, 0, buf, len); + if (ret) { + mutex_unlock(&dd->ipath_eep_lock); + ipath_dev_err(dd, "Unable read EEPROM for logging\n"); + goto free_bail; + } + ifp = (struct ipath_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + mutex_unlock(&dd->ipath_eep_lock); + ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", + csum, ifp->if_csum); + ret = 1; + goto free_bail; + } + hi_water = 0; + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + int new_val = dd->ipath_eep_st_new_errs[idx]; + if (new_val) { + /* + * If we have seen any errors, add to EEPROM values + * We need to saturate at 0xFF (255) and we also + * would need to adjust the checksum if we were + * trying to minimize EEPROM traffic + * Note that we add to actual current count in EEPROM, + * in case it was altered while we were running. + */ + new_val += ifp->if_errcntp[idx]; + if (new_val > 0xFF) + new_val = 0xFF; + if (ifp->if_errcntp[idx] != new_val) { + ifp->if_errcntp[idx] = new_val; + hi_water = offsetof(struct ipath_flash, + if_errcntp) + idx; + } + /* + * update our shadow (used to minimize EEPROM + * traffic), to match what we are about to write. + */ + dd->ipath_eep_st_errs[idx] = new_val; + dd->ipath_eep_st_new_errs[idx] = 0; + } + } + /* + * now update active-time. We would like to round to the nearest hour + * but unless atomic_t are sure to be proper signed ints we cannot, + * because we need to account for what we "transfer" to EEPROM and + * if we log an hour at 31 minutes, then we would need to set + * active_time to -29 to accurately count the _next_ hour. + */ + if (new_time >= 3600) { + new_hrs = new_time / 3600; + atomic_sub((new_hrs * 3600), &dd->ipath_active_time); + new_hrs += dd->ipath_eep_hrs; + if (new_hrs > 0xFFFF) + new_hrs = 0xFFFF; + dd->ipath_eep_hrs = new_hrs; + if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { + ifp->if_powerhour[0] = new_hrs & 0xFF; + hi_water = offsetof(struct ipath_flash, if_powerhour); + } + if ((new_hrs >> 8) != ifp->if_powerhour[1]) { + ifp->if_powerhour[1] = new_hrs >> 8; + hi_water = offsetof(struct ipath_flash, if_powerhour) + + 1; + } + } + /* + * There is a tiny possibility that we could somehow fail to write + * the EEPROM after updating our shadows, but problems from holding + * the spinlock too long are a much bigger issue. + */ + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + if (hi_water) { + /* we made some change to the data, uopdate cksum and write */ + csum = flash_csum(ifp, 1); + ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); + } + mutex_unlock(&dd->ipath_eep_lock); + if (ret) + ipath_dev_err(dd, "Failed updating EEPROM\n"); + +free_bail: + vfree(buf); +bail: + return ret; + +} + +/** + * ipath_inc_eeprom_err - increment one of the four error counters + * that are logged to EEPROM. + * @dd: the infinipath device + * @eidx: 0..3, the counter to increment + * @incr: how much to add + * + * Each counter is 8-bits, and saturates at 255 (0xFF). They + * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log() + * is called, but it can only be called in a context that allows sleep. + * This function can be called even at interrupt level. + */ + +void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr) +{ + uint new_val; + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + new_val = dd->ipath_eep_st_new_errs[eidx] + incr; + if (new_val > 255) + new_val = 255; + dd->ipath_eep_st_new_errs[eidx] = new_val; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + return; +} + +static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum) +{ + int ret; + struct i2c_chain_desc *icd; + + ret = -ENOENT; + + icd = ipath_i2c_type(dd); + if (!icd) + goto bail; + + if (icd->temp_dev == IPATH_NO_DEV) { + /* tempsense only exists on new, real-I2C boards */ + ret = -ENXIO; + goto bail; + } + + if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) { + ipath_dbg("Failed tempsense startcmd\n"); + stop_cmd(dd); + ret = -ENXIO; + goto bail; + } + ret = wr_byte(dd, regnum); + stop_cmd(dd); + if (ret) { + ipath_dev_err(dd, "Failed tempsense WR command %02X\n", + regnum); + ret = -ENXIO; + goto bail; + } + if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) { + ipath_dbg("Failed tempsense RD startcmd\n"); + stop_cmd(dd); + ret = -ENXIO; + goto bail; + } + /* + * We can only clock out one byte per command, sensibly + */ + ret = rd_byte(dd); + stop_cmd(dd); + +bail: + return ret; +} + +#define VALID_TS_RD_REG_MASK 0xBF + +/** + * ipath_tempsense_read - read register of temp sensor via I2C + * @dd: the infinipath device + * @regnum: register to read from + * + * returns reg contents (0..255) or < 0 for error + */ +int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum) +{ + int ret; + + if (regnum > 7) + return -EINVAL; + + /* return a bogus value for (the one) register we do not have */ + if (!((1 << regnum) & VALID_TS_RD_REG_MASK)) + return 0; + + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); + if (!ret) { + ret = ipath_tempsense_internal_read(dd, regnum); + mutex_unlock(&dd->ipath_eep_lock); + } + + /* + * There are three possibilities here: + * ret is actual value (0..255) + * ret is -ENXIO or -EINVAL from code in this file + * ret is -EINTR from mutex_lock_interruptible. + */ + return ret; +} + +static int ipath_tempsense_internal_write(struct ipath_devdata *dd, + u8 regnum, u8 data) +{ + int ret = -ENOENT; + struct i2c_chain_desc *icd; + + icd = ipath_i2c_type(dd); + if (!icd) + goto bail; + + if (icd->temp_dev == IPATH_NO_DEV) { + /* tempsense only exists on new, real-I2C boards */ + ret = -ENXIO; + goto bail; + } + if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) { + ipath_dbg("Failed tempsense startcmd\n"); + stop_cmd(dd); + ret = -ENXIO; + goto bail; + } + ret = wr_byte(dd, regnum); + if (ret) { + stop_cmd(dd); + ipath_dev_err(dd, "Failed to write tempsense command %02X\n", + regnum); + ret = -ENXIO; + goto bail; + } + ret = wr_byte(dd, data); + stop_cmd(dd); + ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD); + if (ret) { + ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n", + regnum); + ret = -ENXIO; + } + +bail: + return ret; +} + +#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD)) + +/** + * ipath_tempsense_write - write register of temp sensor via I2C + * @dd: the infinipath device + * @regnum: register to write + * @data: data to write + * + * returns 0 for success or < 0 for error + */ +int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data) +{ + int ret; + + if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK)) + return -EINVAL; + + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); + if (!ret) { + ret = ipath_tempsense_internal_write(dd, regnum, data); + mutex_unlock(&dd->ipath_eep_lock); + } + + /* + * There are three possibilities here: + * ret is 0 for success + * ret is -ENXIO or -EINVAL from code in this file + * ret is -EINTR from mutex_lock_interruptible. + */ + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c new file mode 100644 index 00000000..736d9edb --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -0,0 +1,2616 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_common.h" +#include "ipath_user_sdma.h" + +static int ipath_open(struct inode *, struct file *); +static int ipath_close(struct inode *, struct file *); +static ssize_t ipath_write(struct file *, const char __user *, size_t, + loff_t *); +static ssize_t ipath_writev(struct kiocb *, const struct iovec *, + unsigned long , loff_t); +static unsigned int ipath_poll(struct file *, struct poll_table_struct *); +static int ipath_mmap(struct file *, struct vm_area_struct *); + +static const struct file_operations ipath_file_ops = { + .owner = THIS_MODULE, + .write = ipath_write, + .aio_write = ipath_writev, + .open = ipath_open, + .release = ipath_close, + .poll = ipath_poll, + .mmap = ipath_mmap, + .llseek = noop_llseek, +}; + +/* + * Convert kernel virtual addresses to physical addresses so they don't + * potentially conflict with the chip addresses used as mmap offsets. + * It doesn't really matter what mmap offset we use as long as we can + * interpret it correctly. + */ +static u64 cvt_kvaddr(void *p) +{ + struct page *page; + u64 paddr = 0; + + page = vmalloc_to_page(p); + if (page) + paddr = page_to_pfn(page) << PAGE_SHIFT; + + return paddr; +} + +static int ipath_get_base_info(struct file *fp, + void __user *ubase, size_t ubase_size) +{ + struct ipath_portdata *pd = port_fp(fp); + int ret = 0; + struct ipath_base_info *kinfo = NULL; + struct ipath_devdata *dd = pd->port_dd; + unsigned subport_cnt; + int shared, master; + size_t sz; + + subport_cnt = pd->port_subport_cnt; + if (!subport_cnt) { + shared = 0; + master = 0; + subport_cnt = 1; + } else { + shared = 1; + master = !subport_fp(fp); + } + + sz = sizeof(*kinfo); + /* If port sharing is not requested, allow the old size structure */ + if (!shared) + sz -= 7 * sizeof(u64); + if (ubase_size < sz) { + ipath_cdbg(PROC, + "Base size %zu, need %zu (version mismatch?)\n", + ubase_size, sz); + ret = -EINVAL; + goto bail; + } + + kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); + if (kinfo == NULL) { + ret = -ENOMEM; + goto bail; + } + + ret = dd->ipath_f_get_base_info(pd, kinfo); + if (ret < 0) + goto bail; + + kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt; + kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize; + kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt; + kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize; + /* + * have to mmap whole thing + */ + kinfo->spi_rcv_egrbuftotlen = + pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; + kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; + kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / + pd->port_rcvegrbuf_chunks; + kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; + if (master) + kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; + /* + * for this use, may be ipath_cfgports summed over all chips that + * are are configured and present + */ + kinfo->spi_nports = dd->ipath_cfgports; + /* unit (chip/board) our port is on */ + kinfo->spi_unit = dd->ipath_unit; + /* for now, only a single page */ + kinfo->spi_tid_maxsize = PAGE_SIZE; + + /* + * Doing this per port, and based on the skip value, etc. This has + * to be the actual buffer size, since the protocol code treats it + * as an array. + * + * These have to be set to user addresses in the user code via mmap. + * These values are used on return to user code for the mmap target + * addresses only. For 32 bit, same 44 bit address problem, so use + * the physical address, not virtual. Before 2.6.11, using the + * page_address() macro worked, but in 2.6.11, even that returns the + * full 64 bit address (upper bits all 1's). So far, using the + * physical addresses (or chip offsets, for chip mapping) works, but + * no doubt some future kernel release will change that, and we'll be + * on to yet another method of dealing with this. + */ + kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; + kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; + kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; + kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + + (void *) dd->ipath_statusp - + (void *) dd->ipath_pioavailregs_dma; + if (!shared) { + kinfo->spi_piocnt = pd->port_piocnt; + kinfo->spi_piobufbase = (u64) pd->port_piobufs; + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_ureg_align * pd->port_port; + } else if (master) { + kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) + + (pd->port_piocnt % subport_cnt); + /* Master's PIO buffers are after all the slave's */ + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * + (pd->port_piocnt - kinfo->spi_piocnt); + } else { + unsigned slave = subport_fp(fp) - 1; + + kinfo->spi_piocnt = pd->port_piocnt / subport_cnt; + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * kinfo->spi_piocnt * slave; + } + + if (shared) { + kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_ureg_align * pd->port_port; + kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; + kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; + kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; + + kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + + PAGE_SIZE * subport_fp(fp)); + + kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport_fp(fp)); + kinfo->spi_rcvhdr_tailaddr = 0; + kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + + pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * + subport_fp(fp)); + + kinfo->spi_subport_uregbase = + cvt_kvaddr(pd->subport_uregbase); + kinfo->spi_subport_rcvegrbuf = + cvt_kvaddr(pd->subport_rcvegrbuf); + kinfo->spi_subport_rcvhdr_base = + cvt_kvaddr(pd->subport_rcvhdr_base); + ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", + kinfo->spi_port, kinfo->spi_runtime_flags, + (unsigned long long) kinfo->spi_subport_uregbase, + (unsigned long long) kinfo->spi_subport_rcvegrbuf, + (unsigned long long) kinfo->spi_subport_rcvhdr_base); + } + + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. + */ + kinfo->spi_pioindex = (kinfo->spi_piobufbase - + (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign; + kinfo->spi_pioalign = dd->ipath_palign; + + kinfo->spi_qpair = IPATH_KD_QP; + /* + * user mode PIO buffers are always 2KB, even when 4KB can + * be received, and sent via the kernel; this is ibmaxlen + * for 2K MTU. + */ + kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32); + kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ + kinfo->spi_port = pd->port_port; + kinfo->spi_subport = subport_fp(fp); + kinfo->spi_sw_version = IPATH_KERN_SWVERSION; + kinfo->spi_hw_version = dd->ipath_revision; + + if (master) { + kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; + } + + sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); + if (copy_to_user(ubase, kinfo, sz)) + ret = -EFAULT; + +bail: + kfree(kinfo); + return ret; +} + +/** + * ipath_tid_update - update a port TID + * @pd: the port + * @fp: the ipath device file + * @ti: the TID information + * + * The new implementation as of Oct 2004 is that the driver assigns + * the tid and returns it to the caller. To make it easier to + * catch bugs, and to reduce search time, we keep a cursor for + * each port, walking the shadow tid array to find one that's not + * in use. + * + * For now, if we can't allocate the full list, we fail, although + * in the long run, we'll allocate as many as we can, and the + * caller will deal with that by trying the remaining pages later. + * That means that when we fail, we have to mark the tids as not in + * use again, in our shadow copy. + * + * It's up to the caller to free the tids when they are done. + * We'll unlock the pages as they free them. + * + * Also, right now we are locking one page at a time, but since + * the intended use of this routine is for a single group of + * virtually contiguous pages, that should change to improve + * performance. + */ +static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, + const struct ipath_tid_info *ti) +{ + int ret = 0, ntids; + u32 tid, porttid, cnt, i, tidcnt, tidoff; + u16 *tidlist; + struct ipath_devdata *dd = pd->port_dd; + u64 physaddr; + unsigned long vaddr; + u64 __iomem *tidbase; + unsigned long tidmap[8]; + struct page **pagep = NULL; + unsigned subport = subport_fp(fp); + + if (!dd->ipath_pageshadow) { + ret = -ENOMEM; + goto done; + } + + cnt = ti->tidcnt; + if (!cnt) { + ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n", + (unsigned long long) ti->tidlist); + /* + * Should we treat as success? likely a bug + */ + ret = -EFAULT; + goto done; + } + porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) { + tidcnt = dd->ipath_rcvtidcnt; + tid = pd->port_tidcursor; + tidoff = 0; + } else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + tidoff = dd->ipath_rcvtidcnt - tidcnt; + porttid += tidoff; + tid = tidcursor_fp(fp); + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + tidoff = tidcnt * (subport - 1); + porttid += tidoff; + tid = tidcursor_fp(fp); + } + if (cnt > tidcnt) { + /* make sure it all fits in port_tid_pg_list */ + dev_info(&dd->pcidev->dev, "Process tried to allocate %u " + "TIDs, only trying max (%u)\n", cnt, tidcnt); + cnt = tidcnt; + } + pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; + tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; + + memset(tidmap, 0, sizeof(tidmap)); + /* before decrement; chip actual # */ + ntids = tidcnt; + tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + + dd->ipath_rcvtidbase + + porttid * sizeof(*tidbase)); + + ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n", + pd->port_port, cnt, tid, tidbase); + + /* virtual address of first page in transfer */ + vaddr = ti->tidvaddr; + if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, + cnt * PAGE_SIZE)) { + ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n", + (void *)vaddr, cnt); + ret = -EFAULT; + goto done; + } + ret = ipath_get_user_pages(vaddr, cnt, pagep); + if (ret) { + if (ret == -EBUSY) { + ipath_dbg("Failed to lock addr %p, %u pages " + "(already locked)\n", + (void *) vaddr, cnt); + /* + * for now, continue, and see what happens but with + * the new implementation, this should never happen, + * unless perhaps the user has mpin'ed the pages + * themselves (something we need to test) + */ + ret = 0; + } else { + dev_info(&dd->pcidev->dev, + "Failed to lock addr %p, %u pages: " + "errno %d\n", (void *) vaddr, cnt, -ret); + goto done; + } + } + for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { + for (; ntids--; tid++) { + if (tid == tidcnt) + tid = 0; + if (!dd->ipath_pageshadow[porttid + tid]) + break; + } + if (ntids < 0) { + /* + * oops, wrapped all the way through their TIDs, + * and didn't have enough free; see comments at + * start of routine + */ + ipath_dbg("Not enough free TIDs for %u pages " + "(index %d), failing\n", cnt, i); + i--; /* last tidlist[i] not filled in */ + ret = -ENOMEM; + break; + } + tidlist[i] = tid + tidoff; + ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " + "vaddr %lx\n", i, tid + tidoff, vaddr); + /* we "know" system pages and TID pages are same size */ + dd->ipath_pageshadow[porttid + tid] = pagep[i]; + dd->ipath_physshadow[porttid + tid] = ipath_map_page( + dd->pcidev, pagep[i], 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + /* + * don't need atomic or it's overhead + */ + __set_bit(tid, tidmap); + physaddr = dd->ipath_physshadow[porttid + tid]; + ipath_stats.sps_pagelocks++; + ipath_cdbg(VERBOSE, + "TID %u, vaddr %lx, physaddr %llx pgp %p\n", + tid, vaddr, (unsigned long long) physaddr, + pagep[i]); + dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, + physaddr); + /* + * don't check this tid in ipath_portshadow, since we + * just filled it in; start with the next one. + */ + tid++; + } + + if (ret) { + u32 limit; + cleanup: + /* jump here if copy out of updated info failed... */ + ipath_dbg("After failure (ret=%d), undo %d of %d entries\n", + -ret, i, cnt); + /* same code that's in ipath_free_tid() */ + limit = sizeof(tidmap) * BITS_PER_BYTE; + if (limit > tidcnt) + /* just in case size changes in future */ + limit = tidcnt; + tid = find_first_bit((const unsigned long *)tidmap, limit); + for (; tid < limit; tid++) { + if (!test_bit(tid, tidmap)) + continue; + if (dd->ipath_pageshadow[porttid + tid]) { + ipath_cdbg(VERBOSE, "Freeing TID %u\n", + tid); + dd->ipath_f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, + dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + dd->ipath_pageshadow[porttid + tid] = NULL; + ipath_stats.sps_pageunlocks++; + } + } + ipath_release_user_pages(pagep, cnt); + } else { + /* + * Copy the updated array, with ipath_tid's filled in, back + * to user. Since we did the copy in already, this "should + * never fail" If it does, we have to clean up... + */ + if (copy_to_user((void __user *) + (unsigned long) ti->tidlist, + tidlist, cnt * sizeof(*tidlist))) { + ret = -EFAULT; + goto cleanup; + } + if (copy_to_user((void __user *) (unsigned long) ti->tidmap, + tidmap, sizeof tidmap)) { + ret = -EFAULT; + goto cleanup; + } + if (tid == tidcnt) + tid = 0; + if (!pd->port_subport_cnt) + pd->port_tidcursor = tid; + else + tidcursor_fp(fp) = tid; + } + +done: + if (ret) + ipath_dbg("Failed to map %u TID pages, failing with %d\n", + ti->tidcnt, -ret); + return ret; +} + +/** + * ipath_tid_free - free a port TID + * @pd: the port + * @subport: the subport + * @ti: the TID info + * + * right now we are unlocking one page at a time, but since + * the intended use of this routine is for a single group of + * virtually contiguous pages, that should change to improve + * performance. We check that the TID is in range for this port + * but otherwise don't check validity; if user has an error and + * frees the wrong tid, it's only their own data that can thereby + * be corrupted. We do check that the TID was in use, for sanity + * We always use our idea of the saved address, not the address that + * they pass in to us. + */ + +static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, + const struct ipath_tid_info *ti) +{ + int ret = 0; + u32 tid, porttid, cnt, limit, tidcnt; + struct ipath_devdata *dd = pd->port_dd; + u64 __iomem *tidbase; + unsigned long tidmap[8]; + + if (!dd->ipath_pageshadow) { + ret = -ENOMEM; + goto done; + } + + if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, + sizeof tidmap)) { + ret = -EFAULT; + goto done; + } + + porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) + tidcnt = dd->ipath_rcvtidcnt; + else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + porttid += dd->ipath_rcvtidcnt - tidcnt; + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + porttid += tidcnt * (subport - 1); + } + tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + + dd->ipath_rcvtidbase + + porttid * sizeof(*tidbase)); + + limit = sizeof(tidmap) * BITS_PER_BYTE; + if (limit > tidcnt) + /* just in case size changes in future */ + limit = tidcnt; + tid = find_first_bit(tidmap, limit); + ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) " + "set is %d, porttid %u\n", pd->port_port, ti->tidcnt, + limit, tid, porttid); + for (cnt = 0; tid < limit; tid++) { + /* + * small optimization; if we detect a run of 3 or so without + * any set, use find_first_bit again. That's mainly to + * accelerate the case where we wrapped, so we have some at + * the beginning, and some at the end, and a big gap + * in the middle. + */ + if (!test_bit(tid, tidmap)) + continue; + cnt++; + if (dd->ipath_pageshadow[porttid + tid]) { + struct page *p; + p = dd->ipath_pageshadow[porttid + tid]; + dd->ipath_pageshadow[porttid + tid] = NULL; + ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", + pid_nr(pd->port_pid), tid); + dd->ipath_f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, + dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + ipath_release_user_pages(&p, 1); + ipath_stats.sps_pageunlocks++; + } else + ipath_dbg("Unused tid %u, ignoring\n", tid); + } + if (cnt != ti->tidcnt) + ipath_dbg("passed in tidcnt %d, only %d bits set in map\n", + ti->tidcnt, cnt); +done: + if (ret) + ipath_dbg("Failed to unmap %u TID pages, failing with %d\n", + ti->tidcnt, -ret); + return ret; +} + +/** + * ipath_set_part_key - set a partition key + * @pd: the port + * @key: the key + * + * We can have up to 4 active at a time (other than the default, which is + * always allowed). This is somewhat tricky, since multiple ports may set + * the same key, so we reference count them, and clean up at exit. All 4 + * partition keys are packed into a single infinipath register. It's an + * error for a process to set the same pkey multiple times. We provide no + * mechanism to de-allocate a pkey at this time, we may eventually need to + * do that. I've used the atomic operations, and no locking, and only make + * a single pass through what's available. This should be more than + * adequate for some time. I'll think about spinlocks or the like if and as + * it's necessary. + */ +static int ipath_set_part_key(struct ipath_portdata *pd, u16 key) +{ + struct ipath_devdata *dd = pd->port_dd; + int i, any = 0, pidx = -1; + u16 lkey = key & 0x7FFF; + int ret; + + if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) { + /* nothing to do; this key always valid */ + ret = 0; + goto bail; + } + + ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys " + "%hx:%x %hx:%x %hx:%x %hx:%x\n", + pd->port_port, key, dd->ipath_pkeys[0], + atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1], + atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2], + atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3], + atomic_read(&dd->ipath_pkeyrefs[3])); + + if (!lkey) { + ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n", + pd->port_port); + ret = -EINVAL; + goto bail; + } + + /* + * Set the full membership bit, because it has to be + * set in the register or the packet, and it seems + * cleaner to set in the register than to force all + * callers to set it. (see bug 4331) + */ + key |= 0x8000; + + for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { + if (!pd->port_pkeys[i] && pidx == -1) + pidx = i; + if (pd->port_pkeys[i] == key) { + ipath_cdbg(VERBOSE, "p%u tries to set same pkey " + "(%x) more than once\n", + pd->port_port, key); + ret = -EEXIST; + goto bail; + } + } + if (pidx == -1) { + ipath_dbg("All pkeys for port %u already in use, " + "can't set %x\n", pd->port_port, key); + ret = -EBUSY; + goto bail; + } + for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i]) { + any++; + continue; + } + if (dd->ipath_pkeys[i] == key) { + atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; + + if (atomic_inc_return(pkrefs) > 1) { + pd->port_pkeys[pidx] = key; + ipath_cdbg(VERBOSE, "p%u set key %x " + "matches #%d, count now %d\n", + pd->port_port, key, i, + atomic_read(pkrefs)); + ret = 0; + goto bail; + } else { + /* + * lost race, decrement count, catch below + */ + atomic_dec(pkrefs); + ipath_cdbg(VERBOSE, "Lost race, count was " + "0, after dec, it's %d\n", + atomic_read(pkrefs)); + any++; + } + } + if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { + /* + * It makes no sense to have both the limited and + * full membership PKEY set at the same time since + * the unlimited one will disable the limited one. + */ + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ipath_dbg("port %u, all pkeys already in use, " + "can't set %x\n", pd->port_port, key); + ret = -EBUSY; + goto bail; + } + for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i] && + atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { + u64 pkey; + + /* for ipathstats, etc. */ + ipath_stats.sps_pkeys[i] = lkey; + pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; + pkey = + (u64) dd->ipath_pkeys[0] | + ((u64) dd->ipath_pkeys[1] << 16) | + ((u64) dd->ipath_pkeys[2] << 32) | + ((u64) dd->ipath_pkeys[3] << 48); + ipath_cdbg(PROC, "p%u set key %x in #%d, " + "portidx %d, new pkey reg %llx\n", + pd->port_port, key, i, pidx, + (unsigned long long) pkey); + ipath_write_kreg( + dd, dd->ipath_kregs->kr_partitionkey, pkey); + + ret = 0; + goto bail; + } + } + ipath_dbg("port %u, all pkeys already in use 2nd pass, " + "can't set %x\n", pd->port_port, key); + ret = -EBUSY; + +bail: + return ret; +} + +/** + * ipath_manage_rcvq - manage a port's receive queue + * @pd: the port + * @subport: the subport + * @start_stop: action to carry out + * + * start_stop == 0 disables receive on the port, for use in queue + * overflow conditions. start_stop==1 re-enables, to be used to + * re-init the software copy of the head register + */ +static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, + int start_stop) +{ + struct ipath_devdata *dd = pd->port_dd; + + ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", + start_stop ? "en" : "dis", dd->ipath_unit, + pd->port_port, subport); + if (subport) + goto bail; + /* atomically clear receive enable port. */ + if (start_stop) { + /* + * On enable, force in-memory copy of the tail register to + * 0, so that protocol code doesn't have to worry about + * whether or not the chip has yet updated the in-memory + * copy or not on return from the system call. The chip + * always resets it's tail register back to 0 on a + * transition from disabled to enabled. This could cause a + * problem if software was broken, and did the enable w/o + * the disable, but eventually the in-memory copy will be + * updated and correct itself, even in the face of software + * bugs. + */ + if (pd->port_rcvhdrtail_kvaddr) + ipath_clear_rcvhdrtail(pd); + set_bit(dd->ipath_r_portenable_shift + pd->port_port, + &dd->ipath_rcvctrl); + } else + clear_bit(dd->ipath_r_portenable_shift + pd->port_port, + &dd->ipath_rcvctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + /* now be sure chip saw it before we return */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + if (start_stop) { + /* + * And try to be sure that tail reg update has happened too. + * This should in theory interlock with the RXE changes to + * the tail register. Don't assign it to the tail register + * in memory copy, since we could overwrite an update by the + * chip if we did. + */ + ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); + } + /* always; new head should be equal to new tail; see above */ +bail: + return 0; +} + +static void ipath_clean_part_key(struct ipath_portdata *pd, + struct ipath_devdata *dd) +{ + int i, j, pchanged = 0; + u64 oldpkey; + + /* for debugging only */ + oldpkey = (u64) dd->ipath_pkeys[0] | + ((u64) dd->ipath_pkeys[1] << 16) | + ((u64) dd->ipath_pkeys[2] << 32) | + ((u64) dd->ipath_pkeys[3] << 48); + + for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { + if (!pd->port_pkeys[i]) + continue; + ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, + pd->port_pkeys[i]); + for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { + /* check for match independent of the global bit */ + if ((dd->ipath_pkeys[j] & 0x7fff) != + (pd->port_pkeys[i] & 0x7fff)) + continue; + if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { + ipath_cdbg(VERBOSE, "p%u clear key " + "%x matches #%d\n", + pd->port_port, + pd->port_pkeys[i], j); + ipath_stats.sps_pkeys[j] = + dd->ipath_pkeys[j] = 0; + pchanged++; + } + else ipath_cdbg( + VERBOSE, "p%u key %x matches #%d, " + "but ref still %d\n", pd->port_port, + pd->port_pkeys[i], j, + atomic_read(&dd->ipath_pkeyrefs[j])); + break; + } + pd->port_pkeys[i] = 0; + } + if (pchanged) { + u64 pkey = (u64) dd->ipath_pkeys[0] | + ((u64) dd->ipath_pkeys[1] << 16) | + ((u64) dd->ipath_pkeys[2] << 32) | + ((u64) dd->ipath_pkeys[3] << 48); + ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " + "new pkey reg %llx\n", pd->port_port, + (unsigned long long) oldpkey, + (unsigned long long) pkey); + ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, + pkey); + } +} + +/* + * Initialize the port data with the receive buffer sizes + * so this can be done while the master port is locked. + * Otherwise, there is a race with a slave opening the port + * and seeing these fields uninitialized. + */ +static void init_user_egr_sizes(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned egrperchunk, egrcnt, size; + + /* + * to avoid wasting a lot of memory, we allocate 32KB chunks of + * physically contiguous memory, advance through it until used up + * and then allocate more. Of course, we need memory to store those + * extra pointers, now. Started out with 256KB, but under heavy + * memory pressure (creating large files and then copying them over + * NFS while doing lots of MPI jobs), we hit some allocation + * failures, even though we can sleep... (2.6.10) Still get + * failures at 64K. 32K is the lowest we can go without wasting + * additional memory. + */ + size = 0x8000; + egrperchunk = size / dd->ipath_rcvegrbufsize; + egrcnt = dd->ipath_rcvegrcnt; + pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; + pd->port_rcvegrbufs_perchunk = egrperchunk; + pd->port_rcvegrbuf_size = size; +} + +/** + * ipath_create_user_egr - allocate eager TID buffers + * @pd: the port to allocate TID buffers for + * + * This routine is now quite different for user and kernel, because + * the kernel uses skb's, for the accelerated network performance + * This is the user port version + * + * Allocate the eager TID buffers and program them into infinipath + * They are no longer completely contiguous, we do multiple allocation + * calls. + */ +static int ipath_create_user_egr(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; + size_t size; + int ret; + gfp_t gfp_flags; + + /* + * GFP_USER, but without GFP_FS, so buffer cache can be + * coalesced (we hope); otherwise, even at order 4, + * heavy filesystem activity makes these fail, and we can + * use compound pages. + */ + gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + + egrcnt = dd->ipath_rcvegrcnt; + /* TID number offset for this port */ + egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt; + egrsize = dd->ipath_rcvegrbufsize; + ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " + "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); + + chunk = pd->port_rcvegrbuf_chunks; + egrperchunk = pd->port_rcvegrbufs_perchunk; + size = pd->port_rcvegrbuf_size; + pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), + GFP_KERNEL); + if (!pd->port_rcvegrbuf) { + ret = -ENOMEM; + goto bail; + } + pd->port_rcvegrbuf_phys = + kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), + GFP_KERNEL); + if (!pd->port_rcvegrbuf_phys) { + ret = -ENOMEM; + goto bail_rcvegrbuf; + } + for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { + + pd->port_rcvegrbuf[e] = dma_alloc_coherent( + &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], + gfp_flags); + + if (!pd->port_rcvegrbuf[e]) { + ret = -ENOMEM; + goto bail_rcvegrbuf_phys; + } + } + + pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; + + for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { + dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; + unsigned i; + + for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { + dd->ipath_f_put_tid(dd, e + egroff + + (u64 __iomem *) + ((char __iomem *) + dd->ipath_kregbase + + dd->ipath_rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, pa); + pa += egrsize; + } + cond_resched(); /* don't hog the cpu */ + } + + ret = 0; + goto bail; + +bail_rcvegrbuf_phys: + for (e = 0; e < pd->port_rcvegrbuf_chunks && + pd->port_rcvegrbuf[e]; e++) { + dma_free_coherent(&dd->pcidev->dev, size, + pd->port_rcvegrbuf[e], + pd->port_rcvegrbuf_phys[e]); + + } + kfree(pd->port_rcvegrbuf_phys); + pd->port_rcvegrbuf_phys = NULL; +bail_rcvegrbuf: + kfree(pd->port_rcvegrbuf); + pd->port_rcvegrbuf = NULL; +bail: + return ret; +} + + +/* common code for the mappings on dma_alloc_coherent mem */ +static int ipath_mmap_mem(struct vm_area_struct *vma, + struct ipath_portdata *pd, unsigned len, int write_ok, + void *kvaddr, char *what) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned long pfn; + int ret; + + if ((vma->vm_end - vma->vm_start) > len) { + dev_info(&dd->pcidev->dev, + "FAIL on %s: len %lx > %x\n", what, + vma->vm_end - vma->vm_start, len); + ret = -EFAULT; + goto bail; + } + + if (!write_ok) { + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "%s must be mapped readonly\n", what); + ret = -EPERM; + goto bail; + } + + /* don't allow them to later change with mprotect */ + vma->vm_flags &= ~VM_MAYWRITE; + } + + pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, vma->vm_start, pfn, + len, vma->vm_page_prot); + if (ret) + dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " + "bytes r%c failed: %d\n", what, pd->port_port, + pfn, len, write_ok?'w':'o', ret); + else + ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " + "r%c\n", what, pd->port_port, pfn, len, + write_ok?'w':'o'); +bail: + return ret; +} + +static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, + u64 ureg) +{ + unsigned long phys; + int ret; + + /* + * This is real hardware, so use io_remap. This is the mechanism + * for the user process to update the head registers for their port + * in the chip. + */ + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " + "%lx > PAGE\n", vma->vm_end - vma->vm_start); + ret = -EFAULT; + } else { + phys = dd->ipath_physaddr + ureg; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + ret = io_remap_pfn_range(vma, vma->vm_start, + phys >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } + return ret; +} + +static int mmap_piobufs(struct vm_area_struct *vma, + struct ipath_devdata *dd, + struct ipath_portdata *pd, + unsigned piobufs, unsigned piocnt) +{ + unsigned long phys; + int ret; + + /* + * When we map the PIO buffers in the chip, we want to map them as + * writeonly, no read possible. This prevents access to previous + * process data, and catches users who might try to read the i/o + * space due to a bug. + */ + if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { + dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " + "reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); + ret = -EINVAL; + goto bail; + } + + phys = dd->ipath_physaddr + piobufs; + +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; + pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; +#endif + + /* + * don't allow them to later change to readable with mprotect (for when + * not initially mapped readable, as is normally the case) + */ + vma->vm_flags &= ~VM_MAYREAD; + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + + ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +bail: + return ret; +} + +static int mmap_rcvegrbufs(struct vm_area_struct *vma, + struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned long start, size; + size_t total_size, i; + unsigned long pfn; + int ret; + + size = pd->port_rcvegrbuf_size; + total_size = pd->port_rcvegrbuf_chunks * size; + if ((vma->vm_end - vma->vm_start) > total_size) { + dev_info(&dd->pcidev->dev, "FAIL on egr bufs: " + "reqlen %lx > actual %lx\n", + vma->vm_end - vma->vm_start, + (unsigned long) total_size); + ret = -EINVAL; + goto bail; + } + + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* don't allow them to later change to writeable with mprotect */ + vma->vm_flags &= ~VM_MAYWRITE; + + start = vma->vm_start; + + for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { + pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, start, pfn, size, + vma->vm_page_prot); + if (ret < 0) + goto bail; + } + ret = 0; + +bail: + return ret; +} + +/* + * ipath_file_vma_fault - handle a VMA page fault. + */ +static int ipath_file_vma_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct page *page; + + page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); + if (!page) + return VM_FAULT_SIGBUS; + get_page(page); + vmf->page = page; + + return 0; +} + +static const struct vm_operations_struct ipath_file_vm_ops = { + .fault = ipath_file_vma_fault, +}; + +static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, + struct ipath_portdata *pd, unsigned subport) +{ + unsigned long len; + struct ipath_devdata *dd; + void *addr; + size_t size; + int ret = 0; + + /* If the port is not shared, all addresses should be physical */ + if (!pd->port_subport_cnt) + goto bail; + + dd = pd->port_dd; + size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; + + /* + * Each process has all the subport uregbase, rcvhdrq, and + * rcvegrbufs mmapped - as an array for all the processes, + * and also separately for this process. + */ + if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { + addr = pd->subport_uregbase; + size = PAGE_SIZE * pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { + addr = pd->subport_rcvhdr_base; + size = pd->port_rcvhdrq_size * pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { + addr = pd->subport_rcvegrbuf; + size *= pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + + PAGE_SIZE * subport)) { + addr = pd->subport_uregbase + PAGE_SIZE * subport; + size = PAGE_SIZE; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport)) { + addr = pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport; + size = pd->port_rcvhdrq_size; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + + size * subport)) { + addr = pd->subport_rcvegrbuf + size * subport; + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; + } else { + goto bail; + } + len = vma->vm_end - vma->vm_start; + if (len > size) { + ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); + ret = -EINVAL; + goto bail; + } + + vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; + vma->vm_ops = &ipath_file_vm_ops; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + ret = 1; + +bail: + return ret; +} + +/** + * ipath_mmap - mmap various structures into user space + * @fp: the file pointer + * @vma: the VM area + * + * We use this to have a shared buffer between the kernel and the user code + * for the rcvhdr queue, egr buffers, and the per-port user regs and pio + * buffers in the chip. We have the open and close entries so we can bump + * the ref count and keep the driver from being unloaded while still mapped. + */ +static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct ipath_portdata *pd; + struct ipath_devdata *dd; + u64 pgaddr, ureg; + unsigned piobufs, piocnt; + int ret; + + pd = port_fp(fp); + if (!pd) { + ret = -EINVAL; + goto bail; + } + dd = pd->port_dd; + + /* + * This is the ipath_do_user_init() code, mapping the shared buffers + * into the user process. The address referred to by vm_pgoff is the + * file offset passed via mmap(). For shared ports, this is the + * kernel vmalloc() address of the pages to share with the master. + * For non-shared or master ports, this is a physical address. + * We only do one mmap for each space mapped. + */ + pgaddr = vma->vm_pgoff << PAGE_SHIFT; + + /* + * Check for 0 in case one of the allocations failed, but user + * called mmap anyway. + */ + if (!pgaddr) { + ret = -EINVAL; + goto bail; + } + + ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", + (unsigned long long) pgaddr, vma->vm_start, + vma->vm_end - vma->vm_start, dd->ipath_unit, + pd->port_port, subport_fp(fp)); + + /* + * Physical addresses must fit in 40 bits for our hardware. + * Check for kernel virtual addresses first, anything else must + * match a HW or memory address. + */ + ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + if (ret) { + if (ret > 0) + ret = 0; + goto bail; + } + + ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; + if (!pd->port_subport_cnt) { + /* port is not shared */ + piocnt = pd->port_piocnt; + piobufs = pd->port_piobufs; + } else if (!subport_fp(fp)) { + /* caller is the master */ + piocnt = (pd->port_piocnt / pd->port_subport_cnt) + + (pd->port_piocnt % pd->port_subport_cnt); + piobufs = pd->port_piobufs + + dd->ipath_palign * (pd->port_piocnt - piocnt); + } else { + unsigned slave = subport_fp(fp) - 1; + + /* caller is a slave */ + piocnt = pd->port_piocnt / pd->port_subport_cnt; + piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; + } + + if (pgaddr == ureg) + ret = mmap_ureg(vma, dd, ureg); + else if (pgaddr == piobufs) + ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); + else if (pgaddr == dd->ipath_pioavailregs_phys) + /* in-memory copy of pioavail registers */ + ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, + (void *) dd->ipath_pioavailregs_dma, + "pioavail registers"); + else if (pgaddr == pd->port_rcvegr_phys) + ret = mmap_rcvegrbufs(vma, pd); + else if (pgaddr == (u64) pd->port_rcvhdrq_phys) + /* + * The rcvhdrq itself; readonly except on HT (so have + * to allow writable mapping), multiple pages, contiguous + * from an i/o perspective. + */ + ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, + pd->port_rcvhdrq, + "rcvhdrq"); + else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) + /* in-memory copy of rcvhdrq tail register */ + ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, + pd->port_rcvhdrtail_kvaddr, + "rcvhdrq tail"); + else + ret = -EINVAL; + + vma->vm_private_data = NULL; + + if (ret < 0) + dev_info(&dd->pcidev->dev, + "Failure %d on off %llx len %lx\n", + -ret, (unsigned long long)pgaddr, + vma->vm_end - vma->vm_start); +bail: + return ret; +} + +static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd) +{ + unsigned pollflag = 0; + + if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) && + pd->port_hdrqfull != pd->port_hdrqfull_poll) { + pollflag |= POLLIN | POLLRDNORM; + pd->port_hdrqfull_poll = pd->port_hdrqfull; + } + + return pollflag; +} + +static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, + struct file *fp, + struct poll_table_struct *pt) +{ + unsigned pollflag = 0; + struct ipath_devdata *dd; + + dd = pd->port_dd; + + /* variable access in ipath_poll_hdrqfull() needs this */ + rmb(); + pollflag = ipath_poll_hdrqfull(pd); + + if (pd->port_urgent != pd->port_urgent_poll) { + pollflag |= POLLIN | POLLRDNORM; + pd->port_urgent_poll = pd->port_urgent; + } + + if (!pollflag) { + /* this saves a spin_lock/unlock in interrupt handler... */ + set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); + /* flush waiting flag so don't miss an event... */ + wmb(); + poll_wait(fp, &pd->port_wait, pt); + } + + return pollflag; +} + +static unsigned int ipath_poll_next(struct ipath_portdata *pd, + struct file *fp, + struct poll_table_struct *pt) +{ + u32 head; + u32 tail; + unsigned pollflag = 0; + struct ipath_devdata *dd; + + dd = pd->port_dd; + + /* variable access in ipath_poll_hdrqfull() needs this */ + rmb(); + pollflag = ipath_poll_hdrqfull(pd); + + head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); + if (pd->port_rcvhdrtail_kvaddr) + tail = ipath_get_rcvhdrtail(pd); + else + tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); + + if (head != tail) + pollflag |= POLLIN | POLLRDNORM; + else { + /* this saves a spin_lock/unlock in interrupt handler */ + set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); + /* flush waiting flag so we don't miss an event */ + wmb(); + + set_bit(pd->port_port + dd->ipath_r_intravail_shift, + &dd->ipath_rcvctrl); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + + if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ + ipath_write_ureg(dd, ur_rcvhdrhead, + dd->ipath_rhdrhead_intr_off | head, + pd->port_port); + + poll_wait(fp, &pd->port_wait, pt); + } + + return pollflag; +} + +static unsigned int ipath_poll(struct file *fp, + struct poll_table_struct *pt) +{ + struct ipath_portdata *pd; + unsigned pollflag; + + pd = port_fp(fp); + if (!pd) + pollflag = 0; + else if (pd->poll_type & IPATH_POLL_TYPE_URGENT) + pollflag = ipath_poll_urgent(pd, fp, pt); + else + pollflag = ipath_poll_next(pd, fp, pt); + + return pollflag; +} + +static int ipath_supports_subports(int user_swmajor, int user_swminor) +{ + /* no subport implementation prior to software version 1.3 */ + return (user_swmajor > 1) || (user_swminor >= 3); +} + +static int ipath_compatible_subports(int user_swmajor, int user_swminor) +{ + /* this code is written long-hand for clarity */ + if (IPATH_USER_SWMAJOR != user_swmajor) { + /* no promise of compatibility if major mismatch */ + return 0; + } + if (IPATH_USER_SWMAJOR == 1) { + switch (IPATH_USER_SWMINOR) { + case 0: + case 1: + case 2: + /* no subport implementation so cannot be compatible */ + return 0; + case 3: + /* 3 is only compatible with itself */ + return user_swminor == 3; + default: + /* >= 4 are compatible (or are expected to be) */ + return user_swminor >= 4; + } + } + /* make no promises yet for future major versions */ + return 0; +} + +static int init_subports(struct ipath_devdata *dd, + struct ipath_portdata *pd, + const struct ipath_user_info *uinfo) +{ + int ret = 0; + unsigned num_subports; + size_t size; + + /* + * If the user is requesting zero subports, + * skip the subport allocation. + */ + if (uinfo->spu_subport_cnt <= 0) + goto bail; + + /* Self-consistency check for ipath_compatible_subports() */ + if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) && + !ipath_compatible_subports(IPATH_USER_SWMAJOR, + IPATH_USER_SWMINOR)) { + dev_info(&dd->pcidev->dev, + "Inconsistent ipath_compatible_subports()\n"); + goto bail; + } + + /* Check for subport compatibility */ + if (!ipath_compatible_subports(uinfo->spu_userversion >> 16, + uinfo->spu_userversion & 0xffff)) { + dev_info(&dd->pcidev->dev, + "Mismatched user version (%d.%d) and driver " + "version (%d.%d) while port sharing. Ensure " + "that driver and library are from the same " + "release.\n", + (int) (uinfo->spu_userversion >> 16), + (int) (uinfo->spu_userversion & 0xffff), + IPATH_USER_SWMAJOR, + IPATH_USER_SWMINOR); + goto bail; + } + if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { + ret = -EINVAL; + goto bail; + } + + num_subports = uinfo->spu_subport_cnt; + pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports); + if (!pd->subport_uregbase) { + ret = -ENOMEM; + goto bail; + } + /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ + size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * + sizeof(u32), PAGE_SIZE) * num_subports; + pd->subport_rcvhdr_base = vzalloc(size); + if (!pd->subport_rcvhdr_base) { + ret = -ENOMEM; + goto bail_ureg; + } + + pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks * + pd->port_rcvegrbuf_size * + num_subports); + if (!pd->subport_rcvegrbuf) { + ret = -ENOMEM; + goto bail_rhdr; + } + + pd->port_subport_cnt = uinfo->spu_subport_cnt; + pd->port_subport_id = uinfo->spu_subport_id; + pd->active_slaves = 1; + set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); + goto bail; + +bail_rhdr: + vfree(pd->subport_rcvhdr_base); +bail_ureg: + vfree(pd->subport_uregbase); + pd->subport_uregbase = NULL; +bail: + return ret; +} + +static int try_alloc_port(struct ipath_devdata *dd, int port, + struct file *fp, + const struct ipath_user_info *uinfo) +{ + struct ipath_portdata *pd; + int ret; + + if (!(pd = dd->ipath_pd[port])) { + void *ptmp; + + pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); + + /* + * Allocate memory for use in ipath_tid_update() just once + * at open, not per call. Reduces cost of expected send + * setup. + */ + ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + + dd->ipath_rcvtidcnt * sizeof(struct page **), + GFP_KERNEL); + if (!pd || !ptmp) { + ipath_dev_err(dd, "Unable to allocate portdata " + "memory, failing open\n"); + ret = -ENOMEM; + kfree(pd); + kfree(ptmp); + goto bail; + } + dd->ipath_pd[port] = pd; + dd->ipath_pd[port]->port_port = port; + dd->ipath_pd[port]->port_dd = dd; + dd->ipath_pd[port]->port_tid_pg_list = ptmp; + init_waitqueue_head(&dd->ipath_pd[port]->port_wait); + } + if (!pd->port_cnt) { + pd->userversion = uinfo->spu_userversion; + init_user_egr_sizes(pd); + if ((ret = init_subports(dd, pd, uinfo)) != 0) + goto bail; + ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", + current->comm, current->pid, dd->ipath_unit, + port); + pd->port_cnt = 1; + port_fp(fp) = pd; + pd->port_pid = get_pid(task_pid(current)); + strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); + ipath_stats.sps_ports++; + ret = 0; + } else + ret = -EBUSY; + +bail: + return ret; +} + +static inline int usable(struct ipath_devdata *dd) +{ + return dd && + (dd->ipath_flags & IPATH_PRESENT) && + dd->ipath_kregbase && + dd->ipath_lid && + !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED + | IPATH_LINKUNK)); +} + +static int find_free_port(int unit, struct file *fp, + const struct ipath_user_info *uinfo) +{ + struct ipath_devdata *dd = ipath_lookup(unit); + int ret, i; + + if (!dd) { + ret = -ENODEV; + goto bail; + } + + if (!usable(dd)) { + ret = -ENETDOWN; + goto bail; + } + + for (i = 1; i < dd->ipath_cfgports; i++) { + ret = try_alloc_port(dd, i, fp, uinfo); + if (ret != -EBUSY) + goto bail; + } + ret = -EBUSY; + +bail: + return ret; +} + +static int find_best_unit(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret = 0, i, prefunit = -1, devmax; + int maxofallports, npresent, nup; + int ndev; + + devmax = ipath_count_units(&npresent, &nup, &maxofallports); + + /* + * This code is present to allow a knowledgeable person to + * specify the layout of processes to processors before opening + * this driver, and then we'll assign the process to the "closest" + * InfiniPath chip to that processor (we assume reasonable connectivity, + * for now). This code assumes that if affinity has been set + * before this point, that at most one cpu is set; for now this + * is reasonable. I check for both cpumask_empty() and cpumask_full(), + * in case some kernel variant sets none of the bits when no + * affinity is set. 2.6.11 and 12 kernels have all present + * cpus set. Some day we'll have to fix it up further to handle + * a cpu subset. This algorithm fails for two HT chips connected + * in tunnel fashion. Eventually this needs real topology + * information. There may be some issues with dual core numbering + * as well. This needs more work prior to release. + */ + if (!cpumask_empty(tsk_cpus_allowed(current)) && + !cpumask_full(tsk_cpus_allowed(current))) { + int ncpus = num_online_cpus(), curcpu = -1, nset = 0; + get_online_cpus(); + for_each_online_cpu(i) + if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) { + ipath_cdbg(PROC, "%s[%u] affinity set for " + "cpu %d/%d\n", current->comm, + current->pid, i, ncpus); + curcpu = i; + nset++; + } + put_online_cpus(); + if (curcpu != -1 && nset != ncpus) { + if (npresent) { + prefunit = curcpu / (ncpus / npresent); + ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " + "%d cpus/chip, select unit %d\n", + current->comm, current->pid, + npresent, ncpus, ncpus / npresent, + prefunit); + } + } + } + + /* + * user ports start at 1, kernel port is 0 + * For now, we do round-robin access across all chips + */ + + if (prefunit != -1) + devmax = prefunit + 1; +recheck: + for (i = 1; i < maxofallports; i++) { + for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; + ndev++) { + struct ipath_devdata *dd = ipath_lookup(ndev); + + if (!usable(dd)) + continue; /* can't use this unit */ + if (i >= dd->ipath_cfgports) + /* + * Maxed out on users of this unit. Try + * next. + */ + continue; + ret = try_alloc_port(dd, i, fp, uinfo); + if (!ret) + goto done; + } + } + + if (npresent) { + if (nup == 0) { + ret = -ENETDOWN; + ipath_dbg("No ports available (none initialized " + "and ready)\n"); + } else { + if (prefunit > 0) { + /* if started above 0, retry from 0 */ + ipath_cdbg(PROC, + "%s[%u] no ports on prefunit " + "%d, clear and re-check\n", + current->comm, current->pid, + prefunit); + devmax = ipath_count_units(NULL, NULL, + NULL); + prefunit = -1; + goto recheck; + } + ret = -EBUSY; + ipath_dbg("No ports available\n"); + } + } else { + ret = -ENXIO; + ipath_dbg("No boards found\n"); + } + +done: + return ret; +} + +static int find_shared_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int devmax, ndev, i; + int ret = 0; + + devmax = ipath_count_units(NULL, NULL, NULL); + + for (ndev = 0; ndev < devmax; ndev++) { + struct ipath_devdata *dd = ipath_lookup(ndev); + + if (!usable(dd)) + continue; + for (i = 1; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + /* Skip ports which are not yet open */ + if (!pd || !pd->port_cnt) + continue; + /* Skip port if it doesn't match the requested one */ + if (pd->port_subport_id != uinfo->spu_subport_id) + continue; + /* Verify the sharing process matches the master */ + if (pd->port_subport_cnt != uinfo->spu_subport_cnt || + pd->userversion != uinfo->spu_userversion || + pd->port_cnt >= pd->port_subport_cnt) { + ret = -EINVAL; + goto done; + } + port_fp(fp) = pd; + subport_fp(fp) = pd->port_cnt++; + pd->port_subpid[subport_fp(fp)] = + get_pid(task_pid(current)); + tidcursor_fp(fp) = 0; + pd->active_slaves |= 1 << subport_fp(fp); + ipath_cdbg(PROC, + "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", + current->comm, current->pid, + subport_fp(fp), + pd->port_comm, pid_nr(pd->port_pid), + dd->ipath_unit, pd->port_port); + ret = 1; + goto done; + } + } + +done: + return ret; +} + +static int ipath_open(struct inode *in, struct file *fp) +{ + /* The real work is performed later in ipath_assign_port() */ + fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); + return fp->private_data ? 0 : -ENOMEM; +} + +/* Get port early, so can set affinity prior to memory allocation */ +static int ipath_assign_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + int i_minor; + unsigned swmajor, swminor; + + /* Check to be sure we haven't already initialized this file */ + if (port_fp(fp)) { + ret = -EINVAL; + goto done; + } + + /* for now, if major version is different, bail */ + swmajor = uinfo->spu_userversion >> 16; + if (swmajor != IPATH_USER_SWMAJOR) { + ipath_dbg("User major version %d not same as driver " + "major %d\n", uinfo->spu_userversion >> 16, + IPATH_USER_SWMAJOR); + ret = -ENODEV; + goto done; + } + + swminor = uinfo->spu_userversion & 0xffff; + if (swminor != IPATH_USER_SWMINOR) + ipath_dbg("User minor version %d not same as driver " + "minor %d\n", swminor, IPATH_USER_SWMINOR); + + mutex_lock(&ipath_mutex); + + if (ipath_compatible_subports(swmajor, swminor) && + uinfo->spu_subport_cnt && + (ret = find_shared_port(fp, uinfo))) { + if (ret > 0) + ret = 0; + goto done_chk_sdma; + } + + i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; + ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", + (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); + + if (i_minor) + ret = find_free_port(i_minor - 1, fp, uinfo); + else + ret = find_best_unit(fp, uinfo); + +done_chk_sdma: + if (!ret) { + struct ipath_filedata *fd = fp->private_data; + const struct ipath_portdata *pd = fd->pd; + const struct ipath_devdata *dd = pd->port_dd; + + fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev, + dd->ipath_unit, + pd->port_port, + fd->subport); + + if (!fd->pq) + ret = -ENOMEM; + } + + mutex_unlock(&ipath_mutex); + +done: + return ret; +} + + +static int ipath_do_user_init(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + struct ipath_portdata *pd = port_fp(fp); + struct ipath_devdata *dd; + u32 head32; + + /* Subports don't need to initialize anything since master did it. */ + if (subport_fp(fp)) { + ret = wait_event_interruptible(pd->port_wait, + !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); + goto done; + } + + dd = pd->port_dd; + + if (uinfo->spu_rcvhdrsize) { + ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); + if (ret) + goto done; + } + + /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ + + /* some ports may get extra buffers, calculate that here */ + if (pd->port_port <= dd->ipath_ports_extrabuf) + pd->port_piocnt = dd->ipath_pbufsport + 1; + else + pd->port_piocnt = dd->ipath_pbufsport; + + /* for right now, kernel piobufs are at end, so port 1 is at 0 */ + if (pd->port_port <= dd->ipath_ports_extrabuf) + pd->port_pio_base = (dd->ipath_pbufsport + 1) + * (pd->port_port - 1); + else + pd->port_pio_base = dd->ipath_ports_extrabuf + + dd->ipath_pbufsport * (pd->port_port - 1); + pd->port_piobufs = dd->ipath_piobufbase + + pd->port_pio_base * dd->ipath_palign; + ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u," + " first pio %u\n", pd->port_port, pd->port_piobufs, + pd->port_piocnt, pd->port_pio_base); + ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0); + + /* + * Now allocate the rcvhdr Q and eager TIDs; skip the TID + * array for time being. If pd->port_port > chip-supported, + * we need to do extra stuff here to handle by handling overflow + * through port 0, someday + */ + ret = ipath_create_rcvhdrq(dd, pd); + if (!ret) + ret = ipath_create_user_egr(pd); + if (ret) + goto done; + + /* + * set the eager head register for this port to the current values + * of the tail pointers, since we don't know if they were + * updated on last use of the port. + */ + head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); + ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); + pd->port_lastrcvhdrqtail = -1; + ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", + pd->port_port, head32); + pd->port_tidcursor = 0; /* start at beginning after open */ + + /* initialize poll variables... */ + pd->port_urgent = 0; + pd->port_urgent_poll = 0; + pd->port_hdrqfull_poll = pd->port_hdrqfull; + + /* + * Now enable the port for receive. + * For chips that are set to DMA the tail register to memory + * when they change (and when the update bit transitions from + * 0 to 1. So for those chips, we turn it off and then back on. + * This will (very briefly) affect any other open ports, but the + * duration is very short, and therefore isn't an issue. We + * explicitly set the in-memory tail copy to 0 beforehand, so we + * don't have to wait to be sure the DMA update has happened + * (chip resets head/tail to 0 on transition to enable). + */ + set_bit(dd->ipath_r_portenable_shift + pd->port_port, + &dd->ipath_rcvctrl); + if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { + if (pd->port_rcvhdrtail_kvaddr) + ipath_clear_rcvhdrtail(pd); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl & + ~(1ULL << dd->ipath_r_tailupd_shift)); + } + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + /* Notify any waiting slaves */ + if (pd->port_subport_cnt) { + clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); + wake_up(&pd->port_wait); + } +done: + return ret; +} + +/** + * unlock_exptid - unlock any expected TID entries port still had in use + * @pd: port + * + * We don't actually update the chip here, because we do a bulk update + * below, using ipath_f_clear_tids. + */ +static void unlock_expected_tids(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt; + int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt; + + ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n", + pd->port_port); + for (i = port_tidbase; i < maxtid; i++) { + struct page *ps = dd->ipath_pageshadow[i]; + + if (!ps) + continue; + + dd->ipath_pageshadow[i] = NULL; + pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + ipath_release_user_pages_on_close(&ps, 1); + cnt++; + ipath_stats.sps_pageunlocks++; + } + if (cnt) + ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n", + pd->port_port, cnt); + + if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks) + ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n", + (unsigned long long) ipath_stats.sps_pagelocks, + (unsigned long long) + ipath_stats.sps_pageunlocks); +} + +static int ipath_close(struct inode *in, struct file *fp) +{ + int ret = 0; + struct ipath_filedata *fd; + struct ipath_portdata *pd; + struct ipath_devdata *dd; + unsigned long flags; + unsigned port; + struct pid *pid; + + ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", + (long)in->i_rdev, fp->private_data); + + mutex_lock(&ipath_mutex); + + fd = fp->private_data; + fp->private_data = NULL; + pd = fd->pd; + if (!pd) { + mutex_unlock(&ipath_mutex); + goto bail; + } + + dd = pd->port_dd; + + /* drain user sdma queue */ + ipath_user_sdma_queue_drain(dd, fd->pq); + ipath_user_sdma_queue_destroy(fd->pq); + + if (--pd->port_cnt) { + /* + * XXX If the master closes the port before the slave(s), + * revoke the mmap for the eager receive queue so + * the slave(s) don't wait for receive data forever. + */ + pd->active_slaves &= ~(1 << fd->subport); + put_pid(pd->port_subpid[fd->subport]); + pd->port_subpid[fd->subport] = NULL; + mutex_unlock(&ipath_mutex); + goto bail; + } + /* early; no interrupt users after this */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); + port = pd->port_port; + dd->ipath_pd[port] = NULL; + pid = pd->port_pid; + pd->port_pid = NULL; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + + if (pd->port_rcvwait_to || pd->port_piowait_to + || pd->port_rcvnowait || pd->port_pionowait) { + ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; " + "%u rcv %u, pio already\n", + pd->port_port, pd->port_rcvwait_to, + pd->port_piowait_to, pd->port_rcvnowait, + pd->port_pionowait); + pd->port_rcvwait_to = pd->port_piowait_to = + pd->port_rcvnowait = pd->port_pionowait = 0; + } + if (pd->port_flag) { + ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n", + pd->port_port, pd->port_flag); + pd->port_flag = 0; + } + + if (dd->ipath_kregbase) { + /* atomically clear receive enable port and intr avail. */ + clear_bit(dd->ipath_r_portenable_shift + port, + &dd->ipath_rcvctrl); + clear_bit(pd->port_port + dd->ipath_r_intravail_shift, + &dd->ipath_rcvctrl); + ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + /* and read back from chip to be sure that nothing + * else is in flight when we do the rest */ + (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + + /* clean up the pkeys for this port user */ + ipath_clean_part_key(pd, dd); + /* + * be paranoid, and never write 0's to these, just use an + * unused part of the port 0 tail page. Of course, + * rcvhdraddr points to a large chunk of memory, so this + * could still trash things, but at least it won't trash + * page 0, and by disabling the port, it should stop "soon", + * even if a packet or two is in already in flight after we + * disabled the port. + */ + ipath_write_kreg_port(dd, + dd->ipath_kregs->kr_rcvhdrtailaddr, port, + dd->ipath_dummy_hdrq_phys); + ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, + pd->port_port, dd->ipath_dummy_hdrq_phys); + + ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt); + ipath_chg_pioavailkernel(dd, pd->port_pio_base, + pd->port_piocnt, 1); + + dd->ipath_f_clear_tids(dd, pd->port_port); + + if (dd->ipath_pageshadow) + unlock_expected_tids(pd); + ipath_stats.sps_ports--; + ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", + pd->port_comm, pid_nr(pid), + dd->ipath_unit, port); + } + + put_pid(pid); + mutex_unlock(&ipath_mutex); + ipath_free_pddata(dd, pd); /* after releasing the mutex */ + +bail: + kfree(fd); + return ret; +} + +static int ipath_port_info(struct ipath_portdata *pd, u16 subport, + struct ipath_port_info __user *uinfo) +{ + struct ipath_port_info info; + int nup; + int ret; + size_t sz; + + (void) ipath_count_units(NULL, &nup, NULL); + info.num_active = nup; + info.unit = pd->port_dd->ipath_unit; + info.port = pd->port_port; + info.subport = subport; + /* Don't return new fields if old library opened the port. */ + if (ipath_supports_subports(pd->userversion >> 16, + pd->userversion & 0xffff)) { + /* Number of user ports available for this device. */ + info.num_ports = pd->port_dd->ipath_cfgports - 1; + info.num_subports = pd->port_subport_cnt; + sz = sizeof(info); + } else + sz = sizeof(info) - 2 * sizeof(u16); + + if (copy_to_user(uinfo, &info, sz)) { + ret = -EFAULT; + goto bail; + } + ret = 0; + +bail: + return ret; +} + +static int ipath_get_slave_info(struct ipath_portdata *pd, + void __user *slave_mask_addr) +{ + int ret = 0; + + if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) + ret = -EFAULT; + return ret; +} + +static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq, + u32 __user *inflightp) +{ + const u32 val = ipath_user_sdma_inflight_counter(pq); + + if (put_user(val, inflightp)) + return -EFAULT; + + return 0; +} + +static int ipath_sdma_get_complete(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + u32 __user *completep) +{ + u32 val; + int err; + + err = ipath_user_sdma_make_progress(dd, pq); + if (err < 0) + return err; + + val = ipath_user_sdma_complete_counter(pq); + if (put_user(val, completep)) + return -EFAULT; + + return 0; +} + +static ssize_t ipath_write(struct file *fp, const char __user *data, + size_t count, loff_t *off) +{ + const struct ipath_cmd __user *ucmd; + struct ipath_portdata *pd; + const void __user *src; + size_t consumed, copy; + struct ipath_cmd cmd; + ssize_t ret = 0; + void *dest; + + if (count < sizeof(cmd.type)) { + ret = -EINVAL; + goto bail; + } + + ucmd = (const struct ipath_cmd __user *) data; + + if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { + ret = -EFAULT; + goto bail; + } + + consumed = sizeof(cmd.type); + + switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + case __IPATH_CMD_USER_INIT: + case IPATH_CMD_USER_INIT: + copy = sizeof(cmd.cmd.user_info); + dest = &cmd.cmd.user_info; + src = &ucmd->cmd.user_info; + break; + case IPATH_CMD_RECV_CTRL: + copy = sizeof(cmd.cmd.recv_ctrl); + dest = &cmd.cmd.recv_ctrl; + src = &ucmd->cmd.recv_ctrl; + break; + case IPATH_CMD_PORT_INFO: + copy = sizeof(cmd.cmd.port_info); + dest = &cmd.cmd.port_info; + src = &ucmd->cmd.port_info; + break; + case IPATH_CMD_TID_UPDATE: + case IPATH_CMD_TID_FREE: + copy = sizeof(cmd.cmd.tid_info); + dest = &cmd.cmd.tid_info; + src = &ucmd->cmd.tid_info; + break; + case IPATH_CMD_SET_PART_KEY: + copy = sizeof(cmd.cmd.part_key); + dest = &cmd.cmd.part_key; + src = &ucmd->cmd.part_key; + break; + case __IPATH_CMD_SLAVE_INFO: + copy = sizeof(cmd.cmd.slave_mask_addr); + dest = &cmd.cmd.slave_mask_addr; + src = &ucmd->cmd.slave_mask_addr; + break; + case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg + copy = 0; + src = NULL; + dest = NULL; + break; + case IPATH_CMD_POLL_TYPE: + copy = sizeof(cmd.cmd.poll_type); + dest = &cmd.cmd.poll_type; + src = &ucmd->cmd.poll_type; + break; + case IPATH_CMD_ARMLAUNCH_CTRL: + copy = sizeof(cmd.cmd.armlaunch_ctrl); + dest = &cmd.cmd.armlaunch_ctrl; + src = &ucmd->cmd.armlaunch_ctrl; + break; + case IPATH_CMD_SDMA_INFLIGHT: + copy = sizeof(cmd.cmd.sdma_inflight); + dest = &cmd.cmd.sdma_inflight; + src = &ucmd->cmd.sdma_inflight; + break; + case IPATH_CMD_SDMA_COMPLETE: + copy = sizeof(cmd.cmd.sdma_complete); + dest = &cmd.cmd.sdma_complete; + src = &ucmd->cmd.sdma_complete; + break; + default: + ret = -EINVAL; + goto bail; + } + + if (copy) { + if ((count - consumed) < copy) { + ret = -EINVAL; + goto bail; + } + + if (copy_from_user(dest, src, copy)) { + ret = -EFAULT; + goto bail; + } + + consumed += copy; + } + + pd = port_fp(fp); + if (!pd && cmd.type != __IPATH_CMD_USER_INIT && + cmd.type != IPATH_CMD_ASSIGN_PORT) { + ret = -EINVAL; + goto bail; + } + + switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + break; + case __IPATH_CMD_USER_INIT: + /* backwards compatibility, get port first */ + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + /* and fall through to current version. */ + case IPATH_CMD_USER_INIT: + ret = ipath_do_user_init(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + ret = ipath_get_base_info( + fp, (void __user *) (unsigned long) + cmd.cmd.user_info.spu_base_info, + cmd.cmd.user_info.spu_base_info_size); + break; + case IPATH_CMD_RECV_CTRL: + ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); + break; + case IPATH_CMD_PORT_INFO: + ret = ipath_port_info(pd, subport_fp(fp), + (struct ipath_port_info __user *) + (unsigned long) cmd.cmd.port_info); + break; + case IPATH_CMD_TID_UPDATE: + ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); + break; + case IPATH_CMD_TID_FREE: + ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); + break; + case IPATH_CMD_SET_PART_KEY: + ret = ipath_set_part_key(pd, cmd.cmd.part_key); + break; + case __IPATH_CMD_SLAVE_INFO: + ret = ipath_get_slave_info(pd, + (void __user *) (unsigned long) + cmd.cmd.slave_mask_addr); + break; + case IPATH_CMD_PIOAVAILUPD: + ipath_force_pio_avail_update(pd->port_dd); + break; + case IPATH_CMD_POLL_TYPE: + pd->poll_type = cmd.cmd.poll_type; + break; + case IPATH_CMD_ARMLAUNCH_CTRL: + if (cmd.cmd.armlaunch_ctrl) + ipath_enable_armlaunch(pd->port_dd); + else + ipath_disable_armlaunch(pd->port_dd); + break; + case IPATH_CMD_SDMA_INFLIGHT: + ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_inflight); + break; + case IPATH_CMD_SDMA_COMPLETE: + ret = ipath_sdma_get_complete(pd->port_dd, + user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_complete); + break; + } + + if (ret >= 0) + ret = consumed; + +bail: + return ret; +} + +static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long dim, loff_t off) +{ + struct file *filp = iocb->ki_filp; + struct ipath_filedata *fp = filp->private_data; + struct ipath_portdata *pd = port_fp(filp); + struct ipath_user_sdma_queue *pq = fp->pq; + + if (!dim) + return -EINVAL; + + return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim); +} + +static struct class *ipath_class; + +static int init_cdev(int minor, char *name, const struct file_operations *fops, + struct cdev **cdevp, struct device **devp) +{ + const dev_t dev = MKDEV(IPATH_MAJOR, minor); + struct cdev *cdev = NULL; + struct device *device = NULL; + int ret; + + cdev = cdev_alloc(); + if (!cdev) { + printk(KERN_ERR IPATH_DRV_NAME + ": Could not allocate cdev for minor %d, %s\n", + minor, name); + ret = -ENOMEM; + goto done; + } + + cdev->owner = THIS_MODULE; + cdev->ops = fops; + kobject_set_name(&cdev->kobj, name); + + ret = cdev_add(cdev, dev, 1); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME + ": Could not add cdev for minor %d, %s (err %d)\n", + minor, name, -ret); + goto err_cdev; + } + + device = device_create(ipath_class, NULL, dev, NULL, name); + + if (IS_ERR(device)) { + ret = PTR_ERR(device); + printk(KERN_ERR IPATH_DRV_NAME ": Could not create " + "device for minor %d, %s (err %d)\n", + minor, name, -ret); + goto err_cdev; + } + + goto done; + +err_cdev: + cdev_del(cdev); + cdev = NULL; + +done: + if (ret >= 0) { + *cdevp = cdev; + *devp = device; + } else { + *cdevp = NULL; + *devp = NULL; + } + + return ret; +} + +int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, + struct cdev **cdevp, struct device **devp) +{ + return init_cdev(minor, name, fops, cdevp, devp); +} + +static void cleanup_cdev(struct cdev **cdevp, + struct device **devp) +{ + struct device *dev = *devp; + + if (dev) { + device_unregister(dev); + *devp = NULL; + } + + if (*cdevp) { + cdev_del(*cdevp); + *cdevp = NULL; + } +} + +void ipath_cdev_cleanup(struct cdev **cdevp, + struct device **devp) +{ + cleanup_cdev(cdevp, devp); +} + +static struct cdev *wildcard_cdev; +static struct device *wildcard_dev; + +static const dev_t dev = MKDEV(IPATH_MAJOR, 0); + +static int user_init(void) +{ + int ret; + + ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME ": Could not register " + "chrdev region (err %d)\n", -ret); + goto done; + } + + ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME); + + if (IS_ERR(ipath_class)) { + ret = PTR_ERR(ipath_class); + printk(KERN_ERR IPATH_DRV_NAME ": Could not create " + "device class (err %d)\n", -ret); + goto bail; + } + + goto done; +bail: + unregister_chrdev_region(dev, IPATH_NMINORS); +done: + return ret; +} + +static void user_cleanup(void) +{ + if (ipath_class) { + class_destroy(ipath_class); + ipath_class = NULL; + } + + unregister_chrdev_region(dev, IPATH_NMINORS); +} + +static atomic_t user_count = ATOMIC_INIT(0); +static atomic_t user_setup = ATOMIC_INIT(0); + +int ipath_user_add(struct ipath_devdata *dd) +{ + char name[10]; + int ret; + + if (atomic_inc_return(&user_count) == 1) { + ret = user_init(); + if (ret < 0) { + ipath_dev_err(dd, "Unable to set up user support: " + "error %d\n", -ret); + goto bail; + } + ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev, + &wildcard_dev); + if (ret < 0) { + ipath_dev_err(dd, "Could not create wildcard " + "minor: error %d\n", -ret); + goto bail_user; + } + + atomic_set(&user_setup, 1); + } + + snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit); + + ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops, + &dd->user_cdev, &dd->user_dev); + if (ret < 0) + ipath_dev_err(dd, "Could not create user minor %d, %s\n", + dd->ipath_unit + 1, name); + + goto bail; + +bail_user: + user_cleanup(); +bail: + return ret; +} + +void ipath_user_remove(struct ipath_devdata *dd) +{ + cleanup_cdev(&dd->user_cdev, &dd->user_dev); + + if (atomic_dec_return(&user_count) == 0) { + if (atomic_read(&user_setup) == 0) + goto bail; + + cleanup_cdev(&wildcard_cdev, &wildcard_dev); + user_cleanup(); + + atomic_set(&user_setup, 0); + } +bail: + return; +} diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c new file mode 100644 index 00000000..a4de9d58 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" + +#define IPATHFS_MAGIC 0x726a77 + +static struct super_block *ipath_super; + +static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, + umode_t mode, const struct file_operations *fops, + void *data) +{ + int error; + struct inode *inode = new_inode(dir->i_sb); + + if (!inode) { + error = -EPERM; + goto bail; + } + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_private = data; + if (S_ISDIR(mode)) { + inode->i_op = &simple_dir_inode_operations; + inc_nlink(inode); + inc_nlink(dir); + } + + inode->i_fop = fops; + + d_instantiate(dentry, inode); + error = 0; + +bail: + return error; +} + +static int create_file(const char *name, umode_t mode, + struct dentry *parent, struct dentry **dentry, + const struct file_operations *fops, void *data) +{ + int error; + + *dentry = NULL; + mutex_lock(&parent->d_inode->i_mutex); + *dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(*dentry)) + error = ipathfs_mknod(parent->d_inode, *dentry, + mode, fops, data); + else + error = PTR_ERR(*dentry); + mutex_unlock(&parent->d_inode->i_mutex); + + return error; +} + +static ssize_t atomic_stats_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, &ipath_stats, + sizeof ipath_stats); +} + +static const struct file_operations atomic_stats_ops = { + .read = atomic_stats_read, + .llseek = default_llseek, +}; + +static ssize_t atomic_counters_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct infinipath_counters counters; + struct ipath_devdata *dd; + + dd = file->f_path.dentry->d_inode->i_private; + dd->ipath_f_read_counters(dd, &counters); + + return simple_read_from_buffer(buf, count, ppos, &counters, + sizeof counters); +} + +static const struct file_operations atomic_counters_ops = { + .read = atomic_counters_read, + .llseek = default_llseek, +}; + +static ssize_t flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct ipath_devdata *dd; + ssize_t ret; + loff_t pos; + char *tmp; + + pos = *ppos; + + if ( pos < 0) { + ret = -EINVAL; + goto bail; + } + + if (pos >= sizeof(struct ipath_flash)) { + ret = 0; + goto bail; + } + + if (count > sizeof(struct ipath_flash) - pos) + count = sizeof(struct ipath_flash) - pos; + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) { + ret = -ENOMEM; + goto bail; + } + + dd = file->f_path.dentry->d_inode->i_private; + if (ipath_eeprom_read(dd, pos, tmp, count)) { + ipath_dev_err(dd, "failed to read from flash\n"); + ret = -ENXIO; + goto bail_tmp; + } + + if (copy_to_user(buf, tmp, count)) { + ret = -EFAULT; + goto bail_tmp; + } + + *ppos = pos + count; + ret = count; + +bail_tmp: + kfree(tmp); + +bail: + return ret; +} + +static ssize_t flash_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct ipath_devdata *dd; + ssize_t ret; + loff_t pos; + char *tmp; + + pos = *ppos; + + if (pos != 0) { + ret = -EINVAL; + goto bail; + } + + if (count != sizeof(struct ipath_flash)) { + ret = -EINVAL; + goto bail; + } + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) { + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmp, buf, count)) { + ret = -EFAULT; + goto bail_tmp; + } + + dd = file->f_path.dentry->d_inode->i_private; + if (ipath_eeprom_write(dd, pos, tmp, count)) { + ret = -ENXIO; + ipath_dev_err(dd, "failed to write to flash\n"); + goto bail_tmp; + } + + *ppos = pos + count; + ret = count; + +bail_tmp: + kfree(tmp); + +bail: + return ret; +} + +static const struct file_operations flash_ops = { + .read = flash_read, + .write = flash_write, + .llseek = default_llseek, +}; + +static int create_device_files(struct super_block *sb, + struct ipath_devdata *dd) +{ + struct dentry *dir, *tmp; + char unit[10]; + int ret; + + snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); + ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, + &simple_dir_operations, dd); + if (ret) { + printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); + goto bail; + } + + ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp, + &atomic_counters_ops, dd); + if (ret) { + printk(KERN_ERR "create_file(%s/atomic_counters) " + "failed: %d\n", unit, ret); + goto bail; + } + + ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, + &flash_ops, dd); + if (ret) { + printk(KERN_ERR "create_file(%s/flash) " + "failed: %d\n", unit, ret); + goto bail; + } + +bail: + return ret; +} + +static int remove_file(struct dentry *parent, char *name) +{ + struct dentry *tmp; + int ret; + + tmp = lookup_one_len(name, parent, strlen(name)); + + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto bail; + } + + spin_lock(&tmp->d_lock); + if (!(d_unhashed(tmp) && tmp->d_inode)) { + dget_dlock(tmp); + __d_drop(tmp); + spin_unlock(&tmp->d_lock); + simple_unlink(parent->d_inode, tmp); + } else + spin_unlock(&tmp->d_lock); + + ret = 0; +bail: + /* + * We don't expect clients to care about the return value, but + * it's there if they need it. + */ + return ret; +} + +static int remove_device_files(struct super_block *sb, + struct ipath_devdata *dd) +{ + struct dentry *dir, *root; + char unit[10]; + int ret; + + root = dget(sb->s_root); + mutex_lock(&root->d_inode->i_mutex); + snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); + dir = lookup_one_len(unit, root, strlen(unit)); + + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + printk(KERN_ERR "Lookup of %s failed\n", unit); + goto bail; + } + + remove_file(dir, "flash"); + remove_file(dir, "atomic_counters"); + d_delete(dir); + ret = simple_rmdir(root->d_inode, dir); + +bail: + mutex_unlock(&root->d_inode->i_mutex); + dput(root); + return ret; +} + +static int ipathfs_fill_super(struct super_block *sb, void *data, + int silent) +{ + struct ipath_devdata *dd, *tmp; + unsigned long flags; + int ret; + + static struct tree_descr files[] = { + [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO}, + {""}, + }; + + ret = simple_fill_super(sb, IPATHFS_MAGIC, files); + if (ret) { + printk(KERN_ERR "simple_fill_super failed: %d\n", ret); + goto bail; + } + + spin_lock_irqsave(&ipath_devs_lock, flags); + + list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { + spin_unlock_irqrestore(&ipath_devs_lock, flags); + ret = create_device_files(sb, dd); + if (ret) + goto bail; + spin_lock_irqsave(&ipath_devs_lock, flags); + } + + spin_unlock_irqrestore(&ipath_devs_lock, flags); + +bail: + return ret; +} + +static struct dentry *ipathfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + struct dentry *ret; + ret = mount_single(fs_type, flags, data, ipathfs_fill_super); + if (!IS_ERR(ret)) + ipath_super = ret->d_sb; + return ret; +} + +static void ipathfs_kill_super(struct super_block *s) +{ + kill_litter_super(s); + ipath_super = NULL; +} + +int ipathfs_add_device(struct ipath_devdata *dd) +{ + int ret; + + if (ipath_super == NULL) { + ret = 0; + goto bail; + } + + ret = create_device_files(ipath_super, dd); + +bail: + return ret; +} + +int ipathfs_remove_device(struct ipath_devdata *dd) +{ + int ret; + + if (ipath_super == NULL) { + ret = 0; + goto bail; + } + + ret = remove_device_files(ipath_super, dd); + +bail: + return ret; +} + +static struct file_system_type ipathfs_fs_type = { + .owner = THIS_MODULE, + .name = "ipathfs", + .mount = ipathfs_mount, + .kill_sb = ipathfs_kill_super, +}; + +int __init ipath_init_ipathfs(void) +{ + return register_filesystem(&ipathfs_fs_type); +} + +void __exit ipath_exit_ipathfs(void) +{ + unregister_filesystem(&ipathfs_fs_type); +} diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c new file mode 100644 index 00000000..1d7aea13 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -0,0 +1,1941 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file contains all of the code that is specific to the InfiniPath + * HT chip. + */ + +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_registers.h" + +static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64); + + +/* + * This lists the InfiniPath registers, in the actual chip layout. + * This structure should never be directly accessed. + * + * The names are in InterCap form because they're taken straight from + * the chip specification. Since they're only used in this file, they + * don't pollute the rest of the source. +*/ + +struct _infinipath_do_not_use_kernel_regs { + unsigned long long Revision; + unsigned long long Control; + unsigned long long PageAlign; + unsigned long long PortCnt; + unsigned long long DebugPortSelect; + unsigned long long DebugPort; + unsigned long long SendRegBase; + unsigned long long UserRegBase; + unsigned long long CounterRegBase; + unsigned long long Scratch; + unsigned long long ReservedMisc1; + unsigned long long InterruptConfig; + unsigned long long IntBlocked; + unsigned long long IntMask; + unsigned long long IntStatus; + unsigned long long IntClear; + unsigned long long ErrorMask; + unsigned long long ErrorStatus; + unsigned long long ErrorClear; + unsigned long long HwErrMask; + unsigned long long HwErrStatus; + unsigned long long HwErrClear; + unsigned long long HwDiagCtrl; + unsigned long long MDIO; + unsigned long long IBCStatus; + unsigned long long IBCCtrl; + unsigned long long ExtStatus; + unsigned long long ExtCtrl; + unsigned long long GPIOOut; + unsigned long long GPIOMask; + unsigned long long GPIOStatus; + unsigned long long GPIOClear; + unsigned long long RcvCtrl; + unsigned long long RcvBTHQP; + unsigned long long RcvHdrSize; + unsigned long long RcvHdrCnt; + unsigned long long RcvHdrEntSize; + unsigned long long RcvTIDBase; + unsigned long long RcvTIDCnt; + unsigned long long RcvEgrBase; + unsigned long long RcvEgrCnt; + unsigned long long RcvBufBase; + unsigned long long RcvBufSize; + unsigned long long RxIntMemBase; + unsigned long long RxIntMemSize; + unsigned long long RcvPartitionKey; + unsigned long long ReservedRcv[10]; + unsigned long long SendCtrl; + unsigned long long SendPIOBufBase; + unsigned long long SendPIOSize; + unsigned long long SendPIOBufCnt; + unsigned long long SendPIOAvailAddr; + unsigned long long TxIntMemBase; + unsigned long long TxIntMemSize; + unsigned long long ReservedSend[9]; + unsigned long long SendBufferError; + unsigned long long SendBufferErrorCONT1; + unsigned long long SendBufferErrorCONT2; + unsigned long long SendBufferErrorCONT3; + unsigned long long ReservedSBE[4]; + unsigned long long RcvHdrAddr0; + unsigned long long RcvHdrAddr1; + unsigned long long RcvHdrAddr2; + unsigned long long RcvHdrAddr3; + unsigned long long RcvHdrAddr4; + unsigned long long RcvHdrAddr5; + unsigned long long RcvHdrAddr6; + unsigned long long RcvHdrAddr7; + unsigned long long RcvHdrAddr8; + unsigned long long ReservedRHA[7]; + unsigned long long RcvHdrTailAddr0; + unsigned long long RcvHdrTailAddr1; + unsigned long long RcvHdrTailAddr2; + unsigned long long RcvHdrTailAddr3; + unsigned long long RcvHdrTailAddr4; + unsigned long long RcvHdrTailAddr5; + unsigned long long RcvHdrTailAddr6; + unsigned long long RcvHdrTailAddr7; + unsigned long long RcvHdrTailAddr8; + unsigned long long ReservedRHTA[7]; + unsigned long long Sync; /* Software only */ + unsigned long long Dump; /* Software only */ + unsigned long long SimVer; /* Software only */ + unsigned long long ReservedSW[5]; + unsigned long long SerdesConfig0; + unsigned long long SerdesConfig1; + unsigned long long SerdesStatus; + unsigned long long XGXSConfig; + unsigned long long ReservedSW2[4]; +}; + +struct _infinipath_do_not_use_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 Reserved1; + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 Reserved6; + __u64 Reserved7; + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; +}; + +#define IPATH_KREG_OFFSET(field) (offsetof( \ + struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) +#define IPATH_CREG_OFFSET(field) (offsetof( \ + struct _infinipath_do_not_use_counters, field) / sizeof(u64)) + +static const struct ipath_kregs ipath_ht_kregs = { + .kr_control = IPATH_KREG_OFFSET(Control), + .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase), + .kr_debugport = IPATH_KREG_OFFSET(DebugPort), + .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect), + .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear), + .kr_errormask = IPATH_KREG_OFFSET(ErrorMask), + .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus), + .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl), + .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus), + .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear), + .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask), + .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut), + .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus), + .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl), + .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear), + .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask), + .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus), + .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl), + .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus), + .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked), + .kr_intclear = IPATH_KREG_OFFSET(IntClear), + .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig), + .kr_intmask = IPATH_KREG_OFFSET(IntMask), + .kr_intstatus = IPATH_KREG_OFFSET(IntStatus), + .kr_mdio = IPATH_KREG_OFFSET(MDIO), + .kr_pagealign = IPATH_KREG_OFFSET(PageAlign), + .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey), + .kr_portcnt = IPATH_KREG_OFFSET(PortCnt), + .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP), + .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase), + .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize), + .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl), + .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase), + .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt), + .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt), + .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize), + .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize), + .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase), + .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize), + .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase), + .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt), + .kr_revision = IPATH_KREG_OFFSET(Revision), + .kr_scratch = IPATH_KREG_OFFSET(Scratch), + .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError), + .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl), + .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr), + .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase), + .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt), + .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize), + .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase), + .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase), + .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize), + .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase), + .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0), + .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1), + .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), + .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), + /* + * These should not be used directly via ipath_write_kreg64(), + * use them with ipath_write_kreg64_port(), + */ + .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), + .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) +}; + +static const struct ipath_cregs ipath_ht_cregs = { + .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt), + .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt), + .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt), + .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt), + .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt), + .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt), + .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt), + .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt), + .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt), + .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt), + .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt), + .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt), + /* calc from Reg_CounterRegBase + offset */ + .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt), + .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt), + .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt), + .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt), + .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt), + .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt), + .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt), + .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt), + .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt), + .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt), + .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt), + .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt), + .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt), + .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt), + .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt), + .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt), + .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt), + .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt), + .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt), + .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt), + .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt) +}; + +/* kr_intstatus, kr_intclear, kr_intmask bits */ +#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1) +#define INFINIPATH_I_RCVURG_SHIFT 0 +#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1) +#define INFINIPATH_I_RCVAVAIL_SHIFT 12 + +/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ +#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0 +#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL +#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR 0x0000000000800000ULL +#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR 0x0000000001000000ULL +#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR 0x0000000002000000ULL +#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR 0x0000000004000000ULL +#define INFINIPATH_HWE_HTCMISCERR4 0x0000000008000000ULL +#define INFINIPATH_HWE_HTCMISCERR5 0x0000000010000000ULL +#define INFINIPATH_HWE_HTCMISCERR6 0x0000000020000000ULL +#define INFINIPATH_HWE_HTCMISCERR7 0x0000000040000000ULL +#define INFINIPATH_HWE_HTCBUSTREQPARITYERR 0x0000000080000000ULL +#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL +#define INFINIPATH_HWE_HTCBUSIREQPARITYERR 0x0000000200000000ULL +#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL +#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL +#define INFINIPATH_HWE_HTBPLL_FBSLIP 0x0200000000000000ULL +#define INFINIPATH_HWE_HTBPLL_RFSLIP 0x0400000000000000ULL +#define INFINIPATH_HWE_HTAPLL_FBSLIP 0x0800000000000000ULL +#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL +#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL + +#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf +#define IBA6110_IBCS_LINKSTATE_SHIFT 4 + +/* kr_extstatus bits */ +#define INFINIPATH_EXTS_FREQSEL 0x2 +#define INFINIPATH_EXTS_SERDESSEL 0x4 +#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 +#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 + + +/* TID entries (memory), HT-only */ +#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ +#define INFINIPATH_RT_VALID 0x8000000000000000ULL +#define INFINIPATH_RT_ADDR_SHIFT 0 +#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL +#define INFINIPATH_RT_BUFSIZE_SHIFT 48 + +#define INFINIPATH_R_INTRAVAIL_SHIFT 16 +#define INFINIPATH_R_TAILUPD_SHIFT 31 + +/* kr_xgxsconfig bits */ +#define INFINIPATH_XGXS_RESET 0x7ULL + +/* + * masks and bits that are different in different chips, or present only + * in one + */ +static const ipath_err_t infinipath_hwe_htcmemparityerr_mask = + INFINIPATH_HWE_HTCMEMPARITYERR_MASK; +static const ipath_err_t infinipath_hwe_htcmemparityerr_shift = + INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT; + +static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr = + INFINIPATH_HWE_HTCLNKABYTE0CRCERR; +static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr = + INFINIPATH_HWE_HTCLNKABYTE1CRCERR; +static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr = + INFINIPATH_HWE_HTCLNKBBYTE0CRCERR; +static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr = + INFINIPATH_HWE_HTCLNKBBYTE1CRCERR; + +#define _IPATH_GPIO_SDA_NUM 1 +#define _IPATH_GPIO_SCL_NUM 0 + +#define IPATH_GPIO_SDA \ + (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) +#define IPATH_GPIO_SCL \ + (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) + +/* keep the code below somewhat more readable; not used elsewhere */ +#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ + infinipath_hwe_htclnkabyte1crcerr) +#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \ + infinipath_hwe_htclnkbbyte1crcerr) +#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ + infinipath_hwe_htclnkbbyte0crcerr) +#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr | \ + infinipath_hwe_htclnkbbyte1crcerr) + +static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, + char *msg, size_t msgl) +{ + char bitsmsg[64]; + ipath_err_t crcbits = hwerrs & + (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS); + /* don't check if 8bit HT */ + if (dd->ipath_flags & IPATH_8BIT_IN_HT0) + crcbits &= ~infinipath_hwe_htclnkabyte1crcerr; + /* don't check if 8bit HT */ + if (dd->ipath_flags & IPATH_8BIT_IN_HT1) + crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr; + /* + * we'll want to ignore link errors on link that is + * not in use, if any. For now, complain about both + */ + if (crcbits) { + u16 ctrl0, ctrl1; + snprintf(bitsmsg, sizeof bitsmsg, + "[HT%s lane %s CRC (%llx); powercycle to completely clear]", + !(crcbits & _IPATH_HTLINK1_CRCBITS) ? + "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS) + ? "1 (B)" : "0+1 (A+B)"), + !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0" + : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" : + "0+1"), (unsigned long long) crcbits); + strlcat(msg, bitsmsg, msgl); + + /* + * print extra info for debugging. slave/primary + * config word 4, 8 (link control 0, 1) + */ + + if (pci_read_config_word(dd->pcidev, + dd->ipath_ht_slave_off + 0x4, + &ctrl0)) + dev_info(&dd->pcidev->dev, "Couldn't read " + "linkctrl0 of slave/primary " + "config block\n"); + else if (!(ctrl0 & 1 << 6)) + /* not if EOC bit set */ + ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0, + ((ctrl0 >> 8) & 7) ? " CRC" : "", + ((ctrl0 >> 4) & 1) ? "linkfail" : + ""); + if (pci_read_config_word(dd->pcidev, + dd->ipath_ht_slave_off + 0x8, + &ctrl1)) + dev_info(&dd->pcidev->dev, "Couldn't read " + "linkctrl1 of slave/primary " + "config block\n"); + else if (!(ctrl1 & 1 << 6)) + /* not if EOC bit set */ + ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1, + ((ctrl1 >> 8) & 7) ? " CRC" : "", + ((ctrl1 >> 4) & 1) ? "linkfail" : + ""); + + /* disable until driver reloaded */ + dd->ipath_hwerrmask &= ~crcbits; + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, + dd->ipath_hwerrmask); + ipath_dbg("HT crc errs: %s\n", msg); + } else + ipath_dbg("ignoring HT crc errors 0x%llx, " + "not in use\n", (unsigned long long) + (hwerrs & (_IPATH_HTLINK0_CRCBITS | + _IPATH_HTLINK1_CRCBITS))); +} + +/* 6110 specific hardware errors... */ +static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"), + INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"), + INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"), + INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"), + INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), + INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), +}; + +#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) +#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \ + << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) + +static void ipath_ht_txe_recover(struct ipath_devdata *dd) +{ + ++ipath_stats.sps_txeparity; + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); +} + + +/** + * ipath_ht_handle_hwerrors - display hardware errors. + * @dd: the infinipath device + * @msg: the output buffer + * @msgl: the size of the output buffer + * + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * ipath_handle_errors() to avoid excessive stack usage. + */ +static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, + size_t msgl) +{ + ipath_err_t hwerrs; + u32 bits, ctrl; + int isfatal = 0; + char bitsmsg[64]; + int log_idx; + + hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); + + if (!hwerrs) { + ipath_cdbg(VERBOSE, "Called but no hardware errors set\n"); + /* + * better than printing cofusing messages + * This seems to be related to clearing the crc error, or + * the pll error during init. + */ + goto bail; + } else if (hwerrs == -1LL) { + ipath_dev_err(dd, "Read of hardware error status failed " + "(all bits set); ignoring\n"); + goto bail; + } + ipath_stats.sps_hwerrs++; + + /* Always clear the error status register, except MEMBISTFAIL, + * regardless of whether we continue or stop using the chip. + * We want that set so we know it failed, even across driver reload. + * We'll still ignore it in the hwerrmask. We do this partly for + * diagnostics, but also for support */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, + hwerrs&~INFINIPATH_HWE_MEMBISTFAILED); + + hwerrs &= dd->ipath_hwerrmask; + + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) + ipath_inc_eeprom_err(dd, log_idx, 1); + + /* + * make sure we get this much out, unless told to be quiet, + * it's a parity error we may recover from, + * or it's occurred within the last 5 seconds + */ + if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY | + RXE_EAGER_PARITY)) || + (ipath_debug & __IPATH_VERBDBG)) + dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " + "(cleared)\n", (unsigned long long) hwerrs); + dd->ipath_lasthwerror |= hwerrs; + + if (hwerrs & ~dd->ipath_hwe_bitsextant) + ipath_dev_err(dd, "hwerror interrupt with unknown errors " + "%llx set\n", (unsigned long long) + (hwerrs & ~dd->ipath_hwe_bitsextant)); + + ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); + if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & TXE_PIO_PARITY) { + ipath_ht_txe_recover(dd); + hwerrs &= ~TXE_PIO_PARITY; + } + + if (!hwerrs) { + ipath_dbg("Clearing freezemode on ignored or " + "recovered hardware error\n"); + ipath_clear_freeze(dd); + } + } + + *msg = '\0'; + + /* + * may someday want to decode into which bits are which + * functional area for parity errors, etc. + */ + if (hwerrs & (infinipath_hwe_htcmemparityerr_mask + << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) { + bits = (u32) ((hwerrs >> + INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) & + INFINIPATH_HWE_HTCMEMPARITYERR_MASK); + snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ", + bits); + strlcat(msg, bitsmsg, msgl); + } + + ipath_format_hwerrors(hwerrs, + ipath_6110_hwerror_msgs, + sizeof(ipath_6110_hwerror_msgs) / + sizeof(ipath_6110_hwerror_msgs[0]), + msg, msgl); + + if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) + hwerr_crcbits(dd, hwerrs, msg, msgl); + + if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, + dd->ipath_hwerrmask); + } +#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ + INFINIPATH_HWE_COREPLL_RFSLIP | \ + INFINIPATH_HWE_HTBPLL_FBSLIP | \ + INFINIPATH_HWE_HTBPLL_RFSLIP | \ + INFINIPATH_HWE_HTAPLL_FBSLIP | \ + INFINIPATH_HWE_HTAPLL_RFSLIP) + + if (hwerrs & _IPATH_PLL_FAIL) { + snprintf(bitsmsg, sizeof bitsmsg, + "[PLL failed (%llx), InfiniPath hardware unusable]", + (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); + strlcat(msg, bitsmsg, msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, + dd->ipath_hwerrmask); + } + + if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { + /* + * If it occurs, it is left masked since the eternal + * interface is unused + */ + dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, + dd->ipath_hwerrmask); + } + + if (hwerrs) { + /* + * if any set that we aren't ignoring; only + * make the complaint once, in case it's stuck + * or recurring, and we get here multiple + * times. + * force link down, so switch knows, and + * LEDs are turned off + */ + if (dd->ipath_flags & IPATH_INITTED) { + ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); + ipath_setup_ht_setextled(dd, + INFINIPATH_IBCS_L_STATE_DOWN, + INFINIPATH_IBCS_LT_STATE_DISABLED); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); + isfatal = 1; + } + *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + /* mark as having had error */ + *dd->ipath_statusp |= IPATH_STATUS_HWERROR; + /* + * mark as not usable, at a minimum until driver + * is reloaded, probably until reboot, since no + * other reset is possible. + */ + dd->ipath_flags &= ~IPATH_INITTED; + } + else + *msg = 0; /* recovered from all of them */ + if (*msg) + ipath_dev_err(dd, "%s hardware error\n", msg); + if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) + /* + * for status file; if no trailing brace is copied, + * we'll know it was truncated. + */ + snprintf(dd->ipath_freezemsg, + dd->ipath_freezelen, "{%s}", msg); + +bail:; +} + +/** + * ipath_ht_boardname - fill in the board name + * @dd: the infinipath device + * @name: the output buffer + * @namelen: the size of the output buffer + * + * fill in the board name, based on the board revision register + */ +static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, + size_t namelen) +{ + char *n = NULL; + u8 boardrev = dd->ipath_boardrev; + int ret = 0; + + switch (boardrev) { + case 5: + /* + * original production board; two production levels, with + * different serial number ranges. See ipath_ht_early_init() for + * case where we enable IPATH_GPIO_INTR for later serial # range. + * Original 112* serial number is no longer supported. + */ + n = "InfiniPath_QHT7040"; + break; + case 7: + /* small form factor production board */ + n = "InfiniPath_QHT7140"; + break; + default: /* don't know, just print the number */ + ipath_dev_err(dd, "Don't yet know about board " + "with ID %u\n", boardrev); + snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", + boardrev); + break; + } + if (n) + snprintf(name, namelen, "%s", n); + + if (ret) { + ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name); + goto bail; + } + if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || + dd->ipath_minrev > 4)) { + /* + * This version of the driver only supports Rev 3.2 - 3.4 + */ + ipath_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->ipath_majrev, dd->ipath_minrev); + ret = 1; + goto bail; + } + /* + * pkt/word counters are 32 bit, and therefore wrap fast enough + * that we snapshot them from a timer, and maintain 64 bit shadow + * copies + */ + dd->ipath_flags |= IPATH_32BITCOUNTERS; + dd->ipath_flags |= IPATH_GPIO_INTR; + if (dd->ipath_lbus_speed != 800) + ipath_dev_err(dd, + "Incorrectly configured for HT @ %uMHz\n", + dd->ipath_lbus_speed); + + /* + * set here, not in ipath_init_*_funcs because we have to do + * it after we can read chip registers. + */ + dd->ipath_ureg_align = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); + +bail: + return ret; +} + +static void ipath_check_htlink(struct ipath_devdata *dd) +{ + u8 linkerr, link_off, i; + + for (i = 0; i < 2; i++) { + link_off = dd->ipath_ht_slave_off + i * 4 + 0xd; + if (pci_read_config_byte(dd->pcidev, link_off, &linkerr)) + dev_info(&dd->pcidev->dev, "Couldn't read " + "linkerror%d of HT slave/primary block\n", + i); + else if (linkerr & 0xf0) { + ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " + "clearing\n", linkerr >> 4, i); + /* + * writing the linkerr bits that are set should + * clear them + */ + if (pci_write_config_byte(dd->pcidev, link_off, + linkerr)) + ipath_dbg("Failed write to clear HT " + "linkerror%d\n", i); + if (pci_read_config_byte(dd->pcidev, link_off, + &linkerr)) + dev_info(&dd->pcidev->dev, + "Couldn't reread linkerror%d of " + "HT slave/primary block\n", i); + else if (linkerr & 0xf0) + dev_info(&dd->pcidev->dev, + "HT linkerror%d bits 0x%x " + "couldn't be cleared\n", + i, linkerr >> 4); + } + } +} + +static int ipath_setup_ht_reset(struct ipath_devdata *dd) +{ + ipath_dbg("No reset possible for this InfiniPath hardware\n"); + return 0; +} + +#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ +#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ + +/* + * Bits 13-15 of command==0 is slave/primary block. Clear any HT CRC + * errors. We only bother to do this at load time, because it's OK if + * it happened before we were loaded (first time after boot/reset), + * but any time after that, it's fatal anyway. Also need to not check + * for upper byte errors if we are in 8 bit mode, so figure out + * our width. For now, at least, also complain if it's 8 bit. + */ +static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, + int pos, u8 cap_type) +{ + u8 linkwidth = 0, linkerr, link_a_b_off, link_off; + u16 linkctrl = 0; + int i; + + dd->ipath_ht_slave_off = pos; + /* command word, master_host bit */ + /* master host || slave */ + if ((cap_type >> 2) & 1) + link_a_b_off = 4; + else + link_a_b_off = 0; + ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n", + link_a_b_off ? 1 : 0, + link_a_b_off ? 'B' : 'A'); + + link_a_b_off += pos; + + /* + * check both link control registers; clear both HT CRC sets if + * necessary. + */ + for (i = 0; i < 2; i++) { + link_off = pos + i * 4 + 0x4; + if (pci_read_config_word(pdev, link_off, &linkctrl)) + ipath_dev_err(dd, "Couldn't read HT link control%d " + "register\n", i); + else if (linkctrl & (0xf << 8)) { + ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error " + "bits %x\n", i, linkctrl & (0xf << 8)); + /* + * now write them back to clear the error. + */ + pci_write_config_word(pdev, link_off, + linkctrl & (0xf << 8)); + } + } + + /* + * As with HT CRC bits, same for protocol errors that might occur + * during boot. + */ + for (i = 0; i < 2; i++) { + link_off = pos + i * 4 + 0xd; + if (pci_read_config_byte(pdev, link_off, &linkerr)) + dev_info(&pdev->dev, "Couldn't read linkerror%d " + "of HT slave/primary block\n", i); + else if (linkerr & 0xf0) { + ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " + "clearing\n", linkerr >> 4, i); + /* + * writing the linkerr bits that are set will clear + * them + */ + if (pci_write_config_byte + (pdev, link_off, linkerr)) + ipath_dbg("Failed write to clear HT " + "linkerror%d\n", i); + if (pci_read_config_byte(pdev, link_off, &linkerr)) + dev_info(&pdev->dev, "Couldn't reread " + "linkerror%d of HT slave/primary " + "block\n", i); + else if (linkerr & 0xf0) + dev_info(&pdev->dev, "HT linkerror%d bits " + "0x%x couldn't be cleared\n", + i, linkerr >> 4); + } + } + + /* + * this is just for our link to the host, not devices connected + * through tunnel. + */ + + if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth)) + ipath_dev_err(dd, "Couldn't read HT link width " + "config register\n"); + else { + u32 width; + switch (linkwidth & 7) { + case 5: + width = 4; + break; + case 4: + width = 2; + break; + case 3: + width = 32; + break; + case 1: + width = 16; + break; + case 0: + default: /* if wrong, assume 8 bit */ + width = 8; + break; + } + + dd->ipath_lbus_width = width; + + if (linkwidth != 0x11) { + ipath_dev_err(dd, "Not configured for 16 bit HT " + "(%x)\n", linkwidth); + if (!(linkwidth & 0xf)) { + ipath_dbg("Will ignore HT lane1 errors\n"); + dd->ipath_flags |= IPATH_8BIT_IN_HT0; + } + } + } + + /* + * this is just for our link to the host, not devices connected + * through tunnel. + */ + if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth)) + ipath_dev_err(dd, "Couldn't read HT link frequency " + "config register\n"); + else { + u32 speed; + switch (linkwidth & 0xf) { + case 6: + speed = 1000; + break; + case 5: + speed = 800; + break; + case 4: + speed = 600; + break; + case 3: + speed = 500; + break; + case 2: + speed = 400; + break; + case 1: + speed = 300; + break; + default: + /* + * assume reserved and vendor-specific are 200... + */ + case 0: + speed = 200; + break; + } + dd->ipath_lbus_speed = speed; + } + + snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info), + "HyperTransport,%uMHz,x%u\n", + dd->ipath_lbus_speed, + dd->ipath_lbus_width); +} + +static int ipath_ht_intconfig(struct ipath_devdata *dd) +{ + int ret; + + if (dd->ipath_intconfig) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, + dd->ipath_intconfig); /* interrupt address */ + ret = 0; + } else { + ipath_dev_err(dd, "No interrupts enabled, couldn't setup " + "interrupt address\n"); + ret = -EINVAL; + } + + return ret; +} + +static void ipath_ht_irq_update(struct pci_dev *dev, int irq, + struct ht_irq_msg *msg) +{ + struct ipath_devdata *dd = pci_get_drvdata(dev); + u64 prev_intconfig = dd->ipath_intconfig; + + dd->ipath_intconfig = msg->address_lo; + dd->ipath_intconfig |= ((u64) msg->address_hi) << 32; + + /* + * If the previous value of dd->ipath_intconfig is zero, we're + * getting configured for the first time, and must not program the + * intconfig register here (it will be programmed later, when the + * hardware is ready). Otherwise, we should. + */ + if (prev_intconfig) + ipath_ht_intconfig(dd); +} + +/** + * ipath_setup_ht_config - setup the interruptconfig register + * @dd: the infinipath device + * @pdev: the PCI device + * + * setup the interruptconfig register from the HT config info. + * Also clear CRC errors in HT linkcontrol, if necessary. + * This is done only for the real hardware. It is done before + * chip address space is initted, so can't touch infinipath registers + */ +static int ipath_setup_ht_config(struct ipath_devdata *dd, + struct pci_dev *pdev) +{ + int pos, ret; + + ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update); + if (ret < 0) { + ipath_dev_err(dd, "Couldn't create interrupt handler: " + "err %d\n", ret); + goto bail; + } + dd->ipath_irq = ret; + ret = 0; + + /* + * Handle clearing CRC errors in linkctrl register if necessary. We + * do this early, before we ever enable errors or hardware errors, + * mostly to avoid causing the chip to enter freeze mode. + */ + pos = pci_find_capability(pdev, PCI_CAP_ID_HT); + if (!pos) { + ipath_dev_err(dd, "Couldn't find HyperTransport " + "capability; no interrupts\n"); + ret = -ENODEV; + goto bail; + } + do { + u8 cap_type; + + /* + * The HT capability type byte is 3 bytes after the + * capability byte. + */ + if (pci_read_config_byte(pdev, pos + 3, &cap_type)) { + dev_info(&pdev->dev, "Couldn't read config " + "command @ %d\n", pos); + continue; + } + if (!(cap_type & 0xE0)) + slave_or_pri_blk(dd, pdev, pos, cap_type); + } while ((pos = pci_find_next_capability(pdev, pos, + PCI_CAP_ID_HT))); + + dd->ipath_flags |= IPATH_SWAP_PIOBUFS; + +bail: + return ret; +} + +/** + * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff + * @dd: the infinipath device + * + * Called during driver unload. + * This is currently a nop for the HT chip, not for all chips + */ +static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) +{ +} + +/** + * ipath_setup_ht_setextled - set the state of the two external LEDs + * @dd: the infinipath device + * @lst: the L state + * @ltst: the LT state + * + * Set the state of the two external LEDs, to indicate physical and + * logical state of IB link. For this chip (at least with recommended + * board pinouts), LED1 is Green (physical state), and LED2 is Yellow + * (logical state) + * + * Note: We try to match the Mellanox HCA LED behavior as best + * we can. Green indicates physical link state is OK (something is + * plugged in, and we can train). + * Amber indicates the link is logically up (ACTIVE). + * Mellanox further blinks the amber LED to indicate data packet + * activity, but we have no hardware support for that, so it would + * require waking up every 10-20 msecs and checking the counters + * on the chip, and then turning the LED off if appropriate. That's + * visible overhead, so not something we will do. + * + */ +static void ipath_setup_ht_setextled(struct ipath_devdata *dd, + u64 lst, u64 ltst) +{ + u64 extctl; + unsigned long flags = 0; + + /* the diags use the LED to indicate diag info, so we leave + * the external LED alone when the diags are running */ + if (ipath_diag_inuse) + return; + + /* Allow override of LED display for, e.g. Locating system in rack */ + if (dd->ipath_led_override) { + ltst = (dd->ipath_led_override & IPATH_LED_PHYS) + ? INFINIPATH_IBCS_LT_STATE_LINKUP + : INFINIPATH_IBCS_LT_STATE_DISABLED; + lst = (dd->ipath_led_override & IPATH_LED_LOG) + ? INFINIPATH_IBCS_L_STATE_ACTIVE + : INFINIPATH_IBCS_L_STATE_DOWN; + } + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + /* + * start by setting both LED control bits to off, then turn + * on the appropriate bit(s). + */ + if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */ + /* + * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF + * is inverted, because it is normally used to indicate + * a hardware fault at reset, if there were errors + */ + extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON) + | INFINIPATH_EXTC_LEDGBLERR_OFF; + if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) + extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF; + if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) + extctl |= INFINIPATH_EXTC_LEDGBLOK_ON; + } + else { + extctl = dd->ipath_extctrl & + ~(INFINIPATH_EXTC_LED1PRIPORT_ON | + INFINIPATH_EXTC_LED2PRIPORT_ON); + if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) + extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; + if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) + extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON; + } + dd->ipath_extctrl = extctl; + ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); +} + +static void ipath_init_ht_variables(struct ipath_devdata *dd) +{ + /* + * setup the register offsets, since they are different for each + * chip + */ + dd->ipath_kregs = &ipath_ht_kregs; + dd->ipath_cregs = &ipath_ht_cregs; + + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; + dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; + dd->ipath_gpio_sda = IPATH_GPIO_SDA; + dd->ipath_gpio_scl = IPATH_GPIO_SCL; + + /* + * Fill in data for field-values that change in newer chips. + * We dynamically specify only the mask for LINKTRAININGSTATE + * and only the shift for LINKSTATE, as they are the only ones + * that change. Also precalculate the 3 link states of interest + * and the combined mask. + */ + dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT; + dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK; + dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << + dd->ibcs_ls_shift) | dd->ibcs_lts_mask; + dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); + dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); + dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); + + /* + * Fill in data for ibcc field-values that change in newer chips. + * We dynamically specify only the mask for LINKINITCMD + * and only the shift for LINKCMD and MAXPKTLEN, as they are + * the only ones that change. + */ + dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; + dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; + dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + + /* Fill in shifts for RcvCtrl. */ + dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; + dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; + dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; + dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */ + + dd->ipath_i_bitsextant = + (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | + (INFINIPATH_I_RCVAVAIL_MASK << + INFINIPATH_I_RCVAVAIL_SHIFT) | + INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | + INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; + + dd->ipath_e_bitsextant = + INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | + INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | + INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | + INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR | + INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP | + INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION | + INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN | + INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK | + INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN | + INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN | + INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT | + INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | + INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED | + INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | + INFINIPATH_E_HARDWARE; + + dd->ipath_hwe_bitsextant = + (INFINIPATH_HWE_HTCMEMPARITYERR_MASK << + INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) | + (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | + (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | + INFINIPATH_HWE_HTCLNKABYTE0CRCERR | + INFINIPATH_HWE_HTCLNKABYTE1CRCERR | + INFINIPATH_HWE_HTCLNKBBYTE0CRCERR | + INFINIPATH_HWE_HTCLNKBBYTE1CRCERR | + INFINIPATH_HWE_HTCMISCERR4 | + INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 | + INFINIPATH_HWE_HTCMISCERR7 | + INFINIPATH_HWE_HTCBUSTREQPARITYERR | + INFINIPATH_HWE_HTCBUSTRESPPARITYERR | + INFINIPATH_HWE_HTCBUSIREQPARITYERR | + INFINIPATH_HWE_RXDSYNCMEMPARITYERR | + INFINIPATH_HWE_MEMBISTFAILED | + INFINIPATH_HWE_COREPLL_FBSLIP | + INFINIPATH_HWE_COREPLL_RFSLIP | + INFINIPATH_HWE_HTBPLL_FBSLIP | + INFINIPATH_HWE_HTBPLL_RFSLIP | + INFINIPATH_HWE_HTAPLL_FBSLIP | + INFINIPATH_HWE_HTAPLL_RFSLIP | + INFINIPATH_HWE_SERDESPLLFAILED | + INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | + INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; + + dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; + dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; + dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->ipath_eep_st_masks[0].hwerrs_to_log = + INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[1].hwerrs_to_log = + INFINIPATH_HWE_RXEMEMPARITYERR_MASK << + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET; + + dd->delay_mult = 2; /* SDR, 4X, can't change */ + + dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + dd->ipath_link_speed_supported = IPATH_IB_SDR; + dd->ipath_link_width_enabled = IB_WIDTH_4X; + dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; + /* these can't change for this chip, so set once */ + dd->ipath_link_width_active = dd->ipath_link_width_enabled; + dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; +} + +/** + * ipath_ht_init_hwerrors - enable hardware errors + * @dd: the infinipath device + * + * now that we have finished initializing everything that might reasonably + * cause a hardware error, and cleared those errors bits as they occur, + * we can enable hardware errors in the mask (potentially enabling + * freeze mode), and enable hardware errors as errors (along with + * everything else) in errormask + */ +static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) +{ + ipath_err_t val; + u64 extsval; + + extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); + + if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) + ipath_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT) + ipath_dbg("MemBIST corrected\n"); + + ipath_check_htlink(dd); + + /* barring bugs, all hwerrors become interrupts, which can */ + val = -1LL; + /* don't look at crc lane1 if 8 bit */ + if (dd->ipath_flags & IPATH_8BIT_IN_HT0) + val &= ~infinipath_hwe_htclnkabyte1crcerr; + /* don't look at crc lane1 if 8 bit */ + if (dd->ipath_flags & IPATH_8BIT_IN_HT1) + val &= ~infinipath_hwe_htclnkbbyte1crcerr; + + /* + * disable RXDSYNCMEMPARITY because external serdes is unused, + * and therefore the logic will never be used or initialized, + * and uninitialized state will normally result in this error + * being asserted. Similarly for the external serdess pll + * lock signal. + */ + val &= ~(INFINIPATH_HWE_SERDESPLLFAILED | + INFINIPATH_HWE_RXDSYNCMEMPARITYERR); + + /* + * Disable MISCERR4 because of an inversion in the HT core + * logic checking for errors that cause this bit to be set. + * The errata can also cause the protocol error bit to be set + * in the HT config space linkerror register(s). + */ + val &= ~INFINIPATH_HWE_HTCMISCERR4; + + /* + * PLL ignored because unused MDIO interface has a logic problem + */ + if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9) + val &= ~INFINIPATH_HWE_SERDESPLLFAILED; + dd->ipath_hwerrmask = val; +} + + + + +/** + * ipath_ht_bringup_serdes - bring up the serdes + * @dd: the infinipath device + */ +static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) +{ + u64 val, config1; + int ret = 0, change = 0; + + ipath_dbg("Trying to bringup serdes\n"); + + if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) & + INFINIPATH_HWE_SERDESPLLFAILED) + { + ipath_dbg("At start, serdes PLL failed bit set in " + "hwerrstatus, clearing and continuing\n"); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, + INFINIPATH_HWE_SERDESPLLFAILED); + } + + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); + config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); + + ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx " + "config1=%llx, sstatus=%llx xgxs %llx\n", + (unsigned long long) val, (unsigned long long) config1, + (unsigned long long) + ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), + (unsigned long long) + ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); + + /* force reset on */ + val |= INFINIPATH_SERDC0_RESET_PLL + /* | INFINIPATH_SERDC0_RESET_MASK */ + ; + ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); + udelay(15); /* need pll reset set at least for a bit */ + + if (val & INFINIPATH_SERDC0_RESET_PLL) { + u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL; + /* set lane resets, and tx idle, during pll reset */ + val2 |= INFINIPATH_SERDC0_RESET_MASK | + INFINIPATH_SERDC0_TXIDLE; + ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing " + "%llx)\n", (unsigned long long) val2); + ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, + val2); + /* + * be sure chip saw it + */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + /* + * need pll reset clear at least 11 usec before lane + * resets cleared; give it a few more + */ + udelay(15); + val = val2; /* for check below */ + } + + if (val & (INFINIPATH_SERDC0_RESET_PLL | + INFINIPATH_SERDC0_RESET_MASK | + INFINIPATH_SERDC0_TXIDLE)) { + val &= ~(INFINIPATH_SERDC0_RESET_PLL | + INFINIPATH_SERDC0_RESET_MASK | + INFINIPATH_SERDC0_TXIDLE); + /* clear them */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, + val); + } + + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + if (val & INFINIPATH_XGXS_RESET) { + /* normally true after boot */ + val &= ~INFINIPATH_XGXS_RESET; + change = 1; + } + if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & + INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { + /* need to compensate for Tx inversion in partner */ + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= dd->ipath_rx_pol_inv << + INFINIPATH_XGXS_RX_POL_SHIFT; + change = 1; + } + if (change) + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); + + /* clear current and de-emphasis bits */ + config1 &= ~0x0ffffffff00ULL; + /* set current to 20ma */ + config1 |= 0x00000000000ULL; + /* set de-emphasis to -5.68dB */ + config1 |= 0x0cccc000000ULL; + ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1); + + ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx " + "config1=%llx, sstatus=%llx xgxs %llx\n", + (unsigned long long) val, (unsigned long long) config1, + (unsigned long long) + ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), + (unsigned long long) + ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); + + return ret; /* for now, say we always succeeded */ +} + +/** + * ipath_ht_quiet_serdes - set serdes to txidle + * @dd: the infinipath device + * driver is being unloaded + */ +static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) +{ + u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); + + val |= INFINIPATH_SERDC0_TXIDLE; + ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", + (unsigned long long) val); + ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); +} + +/** + * ipath_pe_put_tid - write a TID in chip + * @dd: the infinipath device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected + * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing + * + * This exists as a separate routine to allow for special locking etc. + * It's used for both the full cleanup on exit, as well as the normal + * setup and teardown. + */ +static void ipath_ht_put_tid(struct ipath_devdata *dd, + u64 __iomem *tidptr, u32 type, + unsigned long pa) +{ + if (!dd->ipath_kregbase) + return; + + if (pa != dd->ipath_tidinvalid) { + if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { + dev_info(&dd->pcidev->dev, + "physaddr %lx has more than " + "40 bits, using only 40!!!\n", pa); + pa &= INFINIPATH_RT_ADDR_MASK; + } + if (type == RCVHQ_RCV_TYPE_EAGER) + pa |= dd->ipath_tidtemplate; + else { + /* in words (fixed, full page). */ + u64 lenvalid = PAGE_SIZE >> 2; + lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT; + pa |= lenvalid | INFINIPATH_RT_VALID; + } + } + + writeq(pa, tidptr); +} + + +/** + * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager + * @dd: the infinipath device + * @port: the port + * + * Used from ipath_close(), and at chip initialization. + */ +static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) +{ + u64 __iomem *tidbase; + int i; + + if (!dd->ipath_kregbase) + return; + + ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port); + + /* + * need to invalidate all of the expected TID entries for this + * port, so we don't have valid entries that might somehow get + * used (early in next use of this port, or through some bug) + */ + tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + + dd->ipath_rcvtidbase + + port * dd->ipath_rcvtidcnt * + sizeof(*tidbase)); + for (i = 0; i < dd->ipath_rcvtidcnt; i++) + ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + dd->ipath_tidinvalid); + + tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + + dd->ipath_rcvegrbase + + port * dd->ipath_rcvegrcnt * + sizeof(*tidbase)); + + for (i = 0; i < dd->ipath_rcvegrcnt; i++) + ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + dd->ipath_tidinvalid); +} + +/** + * ipath_ht_tidtemplate - setup constants for TID updates + * @dd: the infinipath device + * + * We setup stuff that we use a lot, to avoid calculating each time + */ +static void ipath_ht_tidtemplate(struct ipath_devdata *dd) +{ + dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2; + dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT; + dd->ipath_tidtemplate |= INFINIPATH_RT_VALID; + + /* + * work around chip errata bug 7358, by marking invalid tids + * as having max length + */ + dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) << + INFINIPATH_RT_BUFSIZE_SHIFT; +} + +static int ipath_ht_early_init(struct ipath_devdata *dd) +{ + u32 __iomem *piobuf; + u32 pioincr, val32; + int i; + + /* + * one cache line; long IB headers will spill over into received + * buffer + */ + dd->ipath_rcvhdrentsize = 16; + dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; + + /* + * For HT, we allocate a somewhat overly large eager buffer, + * such that we can guarantee that we can receive the largest + * packet that we can send out. To truly support a 4KB MTU, + * we need to bump this to a large value. To date, other than + * testing, we have never encountered an HCA that can really + * send 4KB MTU packets, so we do not handle that (we'll get + * errors interrupts if we ever see one). + */ + dd->ipath_rcvegrbufsize = dd->ipath_piosize2k; + + /* + * the min() check here is currently a nop, but it may not + * always be, depending on just how we do ipath_rcvegrbufsize + */ + dd->ipath_ibmaxlen = min(dd->ipath_piosize2k, + dd->ipath_rcvegrbufsize); + dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; + ipath_ht_tidtemplate(dd); + + /* + * zero all the TID entries at startup. We do this for sanity, + * in case of a previous driver crash of some kind, and also + * because the chip powers up with these memories in an unknown + * state. Use portcnt, not cfgports, since this is for the + * full chip, not for current (possibly different) configuration + * value. + * Chip Errata bug 6447 + */ + for (val32 = 0; val32 < dd->ipath_portcnt; val32++) + ipath_ht_clear_tids(dd, val32); + + /* + * write the pbc of each buffer, to be sure it's initialized, then + * cancel all the buffers, and also abort any packets that might + * have been in flight for some reason (the latter is for driver + * unload/reload, but isn't a bad idea at first init). PIO send + * isn't enabled at this point, so there is no danger of sending + * these out on the wire. + * Chip Errata bug 6610 + */ + piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) + + dd->ipath_piobufbase); + pioincr = dd->ipath_palign / sizeof(*piobuf); + for (i = 0; i < dd->ipath_piobcnt2k; i++) { + /* + * reasonable word count, just to init pbc + */ + writel(16, piobuf); + piobuf += pioincr; + } + + ipath_get_eeprom_info(dd); + if (dd->ipath_boardrev == 5) { + /* + * Later production QHT7040 has same changes as QHT7140, so + * can use GPIO interrupts. They have serial #'s starting + * with 128, rather than 112. + */ + if (dd->ipath_serial[0] == '1' && + dd->ipath_serial[1] == '2' && + dd->ipath_serial[2] == '8') + dd->ipath_flags |= IPATH_GPIO_INTR; + else { + ipath_dev_err(dd, "Unsupported InfiniPath board " + "(serial number %.16s)!\n", + dd->ipath_serial); + return 1; + } + } + + if (dd->ipath_minrev >= 4) { + /* Rev4+ reports extra errors via internal GPIO pins */ + dd->ipath_flags |= IPATH_GPIO_ERRINTRS; + dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + dd->ipath_gpio_mask); + } + + return 0; +} + + +/** + * ipath_init_ht_get_base_info - set chip-specific flags for user code + * @dd: the infinipath device + * @kbase: ipath_base_info pointer + * + * We set the PCIE flag because the lower bandwidth on PCIe vs + * HyperTransport can affect some user packet algorithms. + */ +static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) +{ + struct ipath_base_info *kinfo = kbase; + + kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT | + IPATH_RUNTIME_PIO_REGSWAPPED; + + if (pd->port_dd->ipath_minrev < 4) + kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY; + + return 0; +} + +static void ipath_ht_free_irq(struct ipath_devdata *dd) +{ + free_irq(dd->ipath_irq, dd); + ht_destroy_irq(dd->ipath_irq); + dd->ipath_irq = 0; + dd->ipath_intconfig = 0; +} + +static struct ipath_message_header * +ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) +{ + return (struct ipath_message_header *) + &rhf_addr[sizeof(u64) / sizeof(u32)]; +} + +static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports) +{ + dd->ipath_portcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); + dd->ipath_p0_rcvegrcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); +} + +static void ipath_ht_read_counters(struct ipath_devdata *dd, + struct infinipath_counters *cntrs) +{ + cntrs->LBIntCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); + cntrs->LBFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); + cntrs->TxSDmaDescCnt = 0; + cntrs->TxUnsupVLErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); + cntrs->TxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); + cntrs->TxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); + cntrs->TxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); + cntrs->TxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); + cntrs->TxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); + cntrs->TxUnderrunCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); + cntrs->TxFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); + cntrs->TxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); + cntrs->RxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); + cntrs->RxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); + cntrs->RxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); + cntrs->RxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); + cntrs->RxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); + cntrs->RxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); + cntrs->RxICRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); + cntrs->RxVCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); + cntrs->RxFlowCtrlErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); + cntrs->RxBadFormatCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); + cntrs->RxLinkProblemCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); + cntrs->RxEBPCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); + cntrs->RxLPCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); + cntrs->RxBufOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); + cntrs->RxTIDFullErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); + cntrs->RxTIDValidErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); + cntrs->RxPKeyMismatchCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); + cntrs->RxP0HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); + cntrs->RxP1HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); + cntrs->RxP2HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); + cntrs->RxP3HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); + cntrs->RxP4HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); + cntrs->RxP5HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt)); + cntrs->RxP6HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt)); + cntrs->RxP7HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt)); + cntrs->RxP8HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt)); + cntrs->RxP9HdrEgrOvflCnt = 0; + cntrs->RxP10HdrEgrOvflCnt = 0; + cntrs->RxP11HdrEgrOvflCnt = 0; + cntrs->RxP12HdrEgrOvflCnt = 0; + cntrs->RxP13HdrEgrOvflCnt = 0; + cntrs->RxP14HdrEgrOvflCnt = 0; + cntrs->RxP15HdrEgrOvflCnt = 0; + cntrs->RxP16HdrEgrOvflCnt = 0; + cntrs->IBStatusChangeCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); + cntrs->IBLinkErrRecoveryCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); + cntrs->IBLinkDownedCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); + cntrs->IBSymbolErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); + cntrs->RxVL15DroppedPktCnt = 0; + cntrs->RxOtherLocalPhyErrCnt = 0; + cntrs->PcieRetryBufDiagQwordCnt = 0; + cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; + cntrs->LocalLinkIntegrityErrCnt = + (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors; + cntrs->RxVlErrCnt = 0; + cntrs->RxDlidFltrCnt = 0; +} + + +/* no interrupt fallback for these chips */ +static int ipath_ht_nointr_fallback(struct ipath_devdata *dd) +{ + return 0; +} + + +/* + * reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. + */ +static void ipath_ht_xgxs_reset(struct ipath_devdata *dd) +{ + u64 val, prev_val; + + prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val = prev_val | INFINIPATH_XGXS_RESET; + prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control & ~INFINIPATH_C_LINKENABLE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); +} + + +static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which) +{ + int ret; + + switch (which) { + case IPATH_IB_CFG_LWID: + ret = dd->ipath_link_width_active; + break; + case IPATH_IB_CFG_SPD: + ret = dd->ipath_link_speed_active; + break; + case IPATH_IB_CFG_LWID_ENB: + ret = dd->ipath_link_width_enabled; + break; + case IPATH_IB_CFG_SPD_ENB: + ret = dd->ipath_link_speed_enabled; + break; + default: + ret = -ENOTSUPP; + break; + } + return ret; +} + + +/* we assume range checking is already done, if needed */ +static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) +{ + int ret = 0; + + if (which == IPATH_IB_CFG_LWID_ENB) + dd->ipath_link_width_enabled = val; + else if (which == IPATH_IB_CFG_SPD_ENB) + dd->ipath_link_speed_enabled = val; + else + ret = -ENOTSUPP; + return ret; +} + + +static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b) +{ +} + + +static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) +{ + ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs), + ipath_ib_linktrstate(dd, ibcs)); + return 0; +} + + +/** + * ipath_init_iba6110_funcs - set up the chip-specific function pointers + * @dd: the infinipath device + * + * This is global, and is called directly at init to set up the + * chip-specific function pointers for later use. + */ +void ipath_init_iba6110_funcs(struct ipath_devdata *dd) +{ + dd->ipath_f_intrsetup = ipath_ht_intconfig; + dd->ipath_f_bus = ipath_setup_ht_config; + dd->ipath_f_reset = ipath_setup_ht_reset; + dd->ipath_f_get_boardname = ipath_ht_boardname; + dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors; + dd->ipath_f_early_init = ipath_ht_early_init; + dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors; + dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes; + dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes; + dd->ipath_f_clear_tids = ipath_ht_clear_tids; + dd->ipath_f_put_tid = ipath_ht_put_tid; + dd->ipath_f_cleanup = ipath_setup_ht_cleanup; + dd->ipath_f_setextled = ipath_setup_ht_setextled; + dd->ipath_f_get_base_info = ipath_ht_get_base_info; + dd->ipath_f_free_irq = ipath_ht_free_irq; + dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; + dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback; + dd->ipath_f_get_msgheader = ipath_ht_get_msgheader; + dd->ipath_f_config_ports = ipath_ht_config_ports; + dd->ipath_f_read_counters = ipath_ht_read_counters; + dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset; + dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg; + dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg; + dd->ipath_f_config_jint = ipath_ht_config_jint; + dd->ipath_f_ib_updown = ipath_ht_ib_updown; + + /* + * initialize chip-specific variables + */ + ipath_init_ht_variables(dd); +} diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c new file mode 100644 index 00000000..49b09c69 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -0,0 +1,1076 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_common.h" + +/* + * min buffers we want to have per port, after driver + */ +#define IPATH_MIN_USER_PORT_BUFCNT 7 + +/* + * Number of ports we are configured to use (to allow for more pio + * buffers per port, etc.) Zero means use chip value. + */ +static ushort ipath_cfgports; + +module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO); +MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); + +/* + * Number of buffers reserved for driver (verbs and layered drivers.) + * Initialized based on number of PIO buffers if not set via module interface. + * The problem with this is that it's global, but we'll use different + * numbers for different chip types. + */ +static ushort ipath_kpiobufs; + +static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp); + +module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort, + &ipath_kpiobufs, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); + +/** + * create_port0_egr - allocate the eager TID buffers + * @dd: the infinipath device + * + * This code is now quite different for user and kernel, because + * the kernel uses skb's, for the accelerated network performance. + * This is the kernel (port0) version. + * + * Allocate the eager TID buffers and program them into infinipath. + * We use the network layer alloc_skb() allocator to allocate the + * memory, and either use the buffers as is for things like verbs + * packets, or pass the buffers up to the ipath layered driver and + * thence the network layer, replacing them as we do so (see + * ipath_rcv_layer()). + */ +static int create_port0_egr(struct ipath_devdata *dd) +{ + unsigned e, egrcnt; + struct ipath_skbinfo *skbinfo; + int ret; + + egrcnt = dd->ipath_p0_rcvegrcnt; + + skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); + if (skbinfo == NULL) { + ipath_dev_err(dd, "allocation error for eager TID " + "skb array\n"); + ret = -ENOMEM; + goto bail; + } + for (e = 0; e < egrcnt; e++) { + /* + * This is a bit tricky in that we allocate extra + * space for 2 bytes of the 14 byte ethernet header. + * These two bytes are passed in the ipath header so + * the rest of the data is word aligned. We allocate + * 4 bytes so that the data buffer stays word aligned. + * See ipath_kreceive() for more details. + */ + skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL); + if (!skbinfo[e].skb) { + ipath_dev_err(dd, "SKB allocation error for " + "eager TID %u\n", e); + while (e != 0) + dev_kfree_skb(skbinfo[--e].skb); + vfree(skbinfo); + ret = -ENOMEM; + goto bail; + } + } + /* + * After loop above, so we can test non-NULL to see if ready + * to use at receive, etc. + */ + dd->ipath_port0_skbinfo = skbinfo; + + for (e = 0; e < egrcnt; e++) { + dd->ipath_port0_skbinfo[e].phys = + ipath_map_single(dd->pcidev, + dd->ipath_port0_skbinfo[e].skb->data, + dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); + dd->ipath_f_put_tid(dd, e + (u64 __iomem *) + ((char __iomem *) dd->ipath_kregbase + + dd->ipath_rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, + dd->ipath_port0_skbinfo[e].phys); + } + + ret = 0; + +bail: + return ret; +} + +static int bringup_link(struct ipath_devdata *dd) +{ + u64 val, ibc; + int ret = 0; + + /* hold IBC in reset */ + dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); + + /* + * set initial max size pkt IBC will send, including ICRC; it's the + * PIO buffer size in dwords, less 1; also see ipath_set_mtu() + */ + val = (dd->ipath_ibmaxlen >> 2) + 1; + ibc = val << dd->ibcc_mpl_shift; + + /* flowcontrolwatermark is in units of KBytes */ + ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT; + /* + * How often flowctrl sent. More or less in usecs; balance against + * watermark value, so that in theory senders always get a flow + * control update in time to not let the IB link go idle. + */ + ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT; + /* max error tolerance */ + ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; + /* use "real" buffer space for */ + ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT; + /* IB credit flow control. */ + ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; + /* initially come up waiting for TS1, without sending anything. */ + dd->ipath_ibcctrl = ibc; + /* + * Want to start out with both LINKCMD and LINKINITCMD in NOP + * (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that + * to stay a NOP. Flag that we are disabled, for the (unlikely) + * case that some recovery path is trying to bring the link up + * before we are ready. + */ + ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE << + INFINIPATH_IBCC_LINKINITCMD_SHIFT; + dd->ipath_flags |= IPATH_IB_LINK_DISABLED; + ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n", + (unsigned long long) ibc); + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc); + + // be sure chip saw it + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + + ret = dd->ipath_f_bringup_serdes(dd); + + if (ret) + dev_info(&dd->pcidev->dev, "Could not initialize SerDes, " + "not usable\n"); + else { + /* enable IBC */ + dd->ipath_control |= INFINIPATH_C_LINKENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); + } + + return ret; +} + +static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd) +{ + struct ipath_portdata *pd = NULL; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (pd) { + pd->port_dd = dd; + pd->port_cnt = 1; + /* The port 0 pkey table is used by the layer interface. */ + pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY; + pd->port_seq_cnt = 1; + } + return pd; +} + +static int init_chip_first(struct ipath_devdata *dd) +{ + struct ipath_portdata *pd; + int ret = 0; + u64 val; + + spin_lock_init(&dd->ipath_kernel_tid_lock); + spin_lock_init(&dd->ipath_user_tid_lock); + spin_lock_init(&dd->ipath_sendctrl_lock); + spin_lock_init(&dd->ipath_uctxt_lock); + spin_lock_init(&dd->ipath_sdma_lock); + spin_lock_init(&dd->ipath_gpio_lock); + spin_lock_init(&dd->ipath_eep_st_lock); + spin_lock_init(&dd->ipath_sdepb_lock); + mutex_init(&dd->ipath_eep_lock); + + /* + * skip cfgports stuff because we are not allocating memory, + * and we don't want problems if the portcnt changed due to + * cfgports. We do still check and report a difference, if + * not same (should be impossible). + */ + dd->ipath_f_config_ports(dd, ipath_cfgports); + if (!ipath_cfgports) + dd->ipath_cfgports = dd->ipath_portcnt; + else if (ipath_cfgports <= dd->ipath_portcnt) { + dd->ipath_cfgports = ipath_cfgports; + ipath_dbg("Configured to use %u ports out of %u in chip\n", + dd->ipath_cfgports, ipath_read_kreg32(dd, + dd->ipath_kregs->kr_portcnt)); + } else { + dd->ipath_cfgports = dd->ipath_portcnt; + ipath_dbg("Tried to configured to use %u ports; chip " + "only supports %u\n", ipath_cfgports, + ipath_read_kreg32(dd, + dd->ipath_kregs->kr_portcnt)); + } + /* + * Allocate full portcnt array, rather than just cfgports, because + * cleanup iterates across all possible ports. + */ + dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt, + GFP_KERNEL); + + if (!dd->ipath_pd) { + ipath_dev_err(dd, "Unable to allocate portdata array, " + "failing\n"); + ret = -ENOMEM; + goto done; + } + + pd = create_portdata0(dd); + if (!pd) { + ipath_dev_err(dd, "Unable to allocate portdata for port " + "0, failing\n"); + ret = -ENOMEM; + goto done; + } + dd->ipath_pd[0] = pd; + + dd->ipath_rcvtidcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); + dd->ipath_rcvtidbase = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); + dd->ipath_rcvegrcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); + dd->ipath_rcvegrbase = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); + dd->ipath_palign = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); + dd->ipath_piobufbase = + ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase); + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize); + dd->ipath_piosize2k = val & ~0U; + dd->ipath_piosize4k = val >> 32; + if (dd->ipath_piosize4k == 0 && ipath_mtu4096) + ipath_mtu4096 = 0; /* 4KB not supported by this chip */ + dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt); + dd->ipath_piobcnt2k = val & ~0U; + dd->ipath_piobcnt4k = val >> 32; + dd->ipath_pio2kbase = + (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) + + (dd->ipath_piobufbase & 0xffffffff)); + if (dd->ipath_piobcnt4k) { + dd->ipath_pio4kbase = (u32 __iomem *) + (((char __iomem *) dd->ipath_kregbase) + + (dd->ipath_piobufbase >> 32)); + /* + * 4K buffers take 2 pages; we use roundup just to be + * paranoid; we calculate it once here, rather than on + * ever buf allocate + */ + dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k, + dd->ipath_palign); + ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p " + "(%x aligned)\n", + dd->ipath_piobcnt2k, dd->ipath_piosize2k, + dd->ipath_pio2kbase, dd->ipath_piobcnt4k, + dd->ipath_piosize4k, dd->ipath_pio4kbase, + dd->ipath_4kalign); + } + else ipath_dbg("%u 2k piobufs @ %p\n", + dd->ipath_piobcnt2k, dd->ipath_pio2kbase); + +done: + return ret; +} + +/** + * init_chip_reset - re-initialize after a reset, or enable + * @dd: the infinipath device + * + * sanity check at least some of the values after reset, and + * ensure no receive or transmit (explicitly, in case reset + * failed + */ +static int init_chip_reset(struct ipath_devdata *dd) +{ + u32 rtmp; + int i; + unsigned long flags; + + /* + * ensure chip does no sends or receives, tail updates, or + * pioavail updates while we re-initialize + */ + dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift); + for (i = 0; i < dd->ipath_portcnt; i++) { + clear_bit(dd->ipath_r_portenable_shift + i, + &dd->ipath_rcvctrl); + clear_bit(dd->ipath_r_intravail_shift + i, + &dd->ipath_rcvctrl); + } + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl = 0U; /* no sdma, etc */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); + + rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); + if (rtmp != dd->ipath_rcvtidcnt) + dev_info(&dd->pcidev->dev, "tidcnt was %u before " + "reset, now %u, using original\n", + dd->ipath_rcvtidcnt, rtmp); + rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); + if (rtmp != dd->ipath_rcvtidbase) + dev_info(&dd->pcidev->dev, "tidbase was %u before " + "reset, now %u, using original\n", + dd->ipath_rcvtidbase, rtmp); + rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); + if (rtmp != dd->ipath_rcvegrcnt) + dev_info(&dd->pcidev->dev, "egrcnt was %u before " + "reset, now %u, using original\n", + dd->ipath_rcvegrcnt, rtmp); + rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); + if (rtmp != dd->ipath_rcvegrbase) + dev_info(&dd->pcidev->dev, "egrbase was %u before " + "reset, now %u, using original\n", + dd->ipath_rcvegrbase, rtmp); + + return 0; +} + +static int init_pioavailregs(struct ipath_devdata *dd) +{ + int ret; + + dd->ipath_pioavailregs_dma = dma_alloc_coherent( + &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys, + GFP_KERNEL); + if (!dd->ipath_pioavailregs_dma) { + ipath_dev_err(dd, "failed to allocate PIOavail reg area " + "in memory\n"); + ret = -ENOMEM; + goto done; + } + + /* + * we really want L2 cache aligned, but for current CPUs of + * interest, they are the same. + */ + dd->ipath_statusp = (u64 *) + ((char *)dd->ipath_pioavailregs_dma + + ((2 * L1_CACHE_BYTES + + dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); + /* copy the current value now that it's really allocated */ + *dd->ipath_statusp = dd->_ipath_status; + /* + * setup buffer to hold freeze msg, accessible to apps, + * following statusp + */ + dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1]; + /* and its length */ + dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]); + + ret = 0; + +done: + return ret; +} + +/** + * init_shadow_tids - allocate the shadow TID array + * @dd: the infinipath device + * + * allocate the shadow TID array, so we can ipath_munlock previous + * entries. It may make more sense to move the pageshadow to the + * port data structure, so we only allocate memory for ports actually + * in use, since we at 8k per port, now. + */ +static void init_shadow_tids(struct ipath_devdata *dd) +{ + struct page **pages; + dma_addr_t *addrs; + + pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(struct page *)); + if (!pages) { + ipath_dev_err(dd, "failed to allocate shadow page * " + "array, no expected sends!\n"); + dd->ipath_pageshadow = NULL; + return; + } + + addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(dma_addr_t)); + if (!addrs) { + ipath_dev_err(dd, "failed to allocate shadow dma handle " + "array, no expected sends!\n"); + vfree(pages); + dd->ipath_pageshadow = NULL; + return; + } + + dd->ipath_pageshadow = pages; + dd->ipath_physshadow = addrs; +} + +static void enable_chip(struct ipath_devdata *dd, int reinit) +{ + u32 val; + u64 rcvmask; + unsigned long flags; + int i; + + if (!reinit) + init_waitqueue_head(&ipath_state_wait); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + /* Enable PIO send, and update of PIOavail regs to memory. */ + dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | + INFINIPATH_S_PIOBUFAVAILUPD; + + /* + * Set the PIO avail update threshold to host memory + * on chips that support it. + */ + if (dd->ipath_pioupd_thresh) + dd->ipath_sendctrl |= dd->ipath_pioupd_thresh + << INFINIPATH_S_UPDTHRESH_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* + * Enable kernel ports' receive and receive interrupt. + * Other ports done as user opens and inits them. + */ + rcvmask = 1ULL; + dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) | + (rcvmask << dd->ipath_r_intravail_shift); + if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) + dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + + /* + * now ready for use. this should be cleared whenever we + * detect a reset, or initiate one. + */ + dd->ipath_flags |= IPATH_INITTED; + + /* + * Init our shadow copies of head from tail values, + * and write head values to match. + */ + val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0); + ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0); + + /* Initialize so we interrupt on next packet received */ + ipath_write_ureg(dd, ur_rcvhdrhead, + dd->ipath_rhdrhead_intr_off | + dd->ipath_pd[0]->port_head, 0); + + /* + * by now pioavail updates to memory should have occurred, so + * copy them into our working/shadow registers; this is in + * case something went wrong with abort, but mostly to get the + * initial values of the generation bit correct. + */ + for (i = 0; i < dd->ipath_pioavregs; i++) { + __le64 pioavail; + + /* + * Chip Errata bug 6641; even and odd qwords>3 are swapped. + */ + if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) + pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; + else + pioavail = dd->ipath_pioavailregs_dma[i]; + /* + * don't need to worry about ipath_pioavailkernel here + * because we will call ipath_chg_pioavailkernel() later + * in initialization, to busy out buffers as needed + */ + dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); + } + /* can get counters, stats, etc. */ + dd->ipath_flags |= IPATH_PRESENT; +} + +static int init_housekeeping(struct ipath_devdata *dd, int reinit) +{ + char boardn[40]; + int ret = 0; + + /* + * have to clear shadow copies of registers at init that are + * not otherwise set here, or all kinds of bizarre things + * happen with driver on chip reset + */ + dd->ipath_rcvhdrsize = 0; + + /* + * Don't clear ipath_flags as 8bit mode was set before + * entering this func. However, we do set the linkstate to + * unknown, so we can watch for a transition. + * PRESENT is set because we want register reads to work, + * and the kernel infrastructure saw it in config space; + * We clear it if we have failures. + */ + dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT; + dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED | + IPATH_LINKDOWN | IPATH_LINKINIT); + + ipath_cdbg(VERBOSE, "Try to read spc chip revision\n"); + dd->ipath_revision = + ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision); + + /* + * set up fundamental info we need to use the chip; we assume + * if the revision reg and these regs are OK, we don't need to + * special case the rest + */ + dd->ipath_sregbase = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase); + dd->ipath_cregbase = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase); + dd->ipath_uregbase = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase); + ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, " + "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase, + dd->ipath_uregbase, dd->ipath_cregbase); + if ((dd->ipath_revision & 0xffffffff) == 0xffffffff + || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff + || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff + || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) { + ipath_dev_err(dd, "Register read failures from chip, " + "giving up initialization\n"); + dd->ipath_flags &= ~IPATH_PRESENT; + ret = -ENODEV; + goto done; + } + + + /* clear diagctrl register, in case diags were running and crashed */ + ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0); + + /* clear the initial reset flag, in case first driver load */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, + INFINIPATH_E_RESET); + + ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n", + (unsigned long long) dd->ipath_revision, + dd->ipath_pcirev); + + if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) & + INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) { + ipath_dev_err(dd, "Driver only handles version %d, " + "chip swversion is %d (%llx), failng\n", + IPATH_CHIP_SWVERSION, + (int)(dd->ipath_revision >> + INFINIPATH_R_SOFTWARE_SHIFT) & + INFINIPATH_R_SOFTWARE_MASK, + (unsigned long long) dd->ipath_revision); + ret = -ENOSYS; + goto done; + } + dd->ipath_majrev = (u8) ((dd->ipath_revision >> + INFINIPATH_R_CHIPREVMAJOR_SHIFT) & + INFINIPATH_R_CHIPREVMAJOR_MASK); + dd->ipath_minrev = (u8) ((dd->ipath_revision >> + INFINIPATH_R_CHIPREVMINOR_SHIFT) & + INFINIPATH_R_CHIPREVMINOR_MASK); + dd->ipath_boardrev = (u8) ((dd->ipath_revision >> + INFINIPATH_R_BOARDID_SHIFT) & + INFINIPATH_R_BOARDID_MASK); + + ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn); + + snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion), + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " + "SW Compat %u\n", + IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn, + (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) & + INFINIPATH_R_ARCH_MASK, + dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev, + (unsigned)(dd->ipath_revision >> + INFINIPATH_R_SOFTWARE_SHIFT) & + INFINIPATH_R_SOFTWARE_MASK); + + ipath_dbg("%s", dd->ipath_boardversion); + + if (ret) + goto done; + + if (reinit) + ret = init_chip_reset(dd); + else + ret = init_chip_first(dd); + +done: + return ret; +} + +static void verify_interrupt(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *) opaque; + + if (!dd) + return; /* being torn down */ + + /* + * If we don't have any interrupts, let the user know and + * don't bother checking again. + */ + if (dd->ipath_int_counter == 0) { + if (!dd->ipath_f_intr_fallback(dd)) + dev_err(&dd->pcidev->dev, "No interrupts detected, " + "not usable.\n"); + else /* re-arm the timer to see if fallback works */ + mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2); + } else + ipath_cdbg(VERBOSE, "%u interrupts at timer check\n", + dd->ipath_int_counter); +} + +/** + * ipath_init_chip - do the actual initialization sequence on the chip + * @dd: the infinipath device + * @reinit: reinitializing, so don't allocate new memory + * + * Do the actual initialization sequence on the chip. This is done + * both from the init routine called from the PCI infrastructure, and + * when we reset the chip, or detect that it was reset internally, + * or it's administratively re-enabled. + * + * Memory allocation here and in called routines is only done in + * the first case (reinit == 0). We have to be careful, because even + * without memory allocation, we need to re-write all the chip registers + * TIDs, etc. after the reset or enable has completed. + */ +int ipath_init_chip(struct ipath_devdata *dd, int reinit) +{ + int ret = 0; + u32 kpiobufs, defkbufs; + u32 piobufs, uports; + u64 val; + struct ipath_portdata *pd; + gfp_t gfp_flags = GFP_USER | __GFP_COMP; + + ret = init_housekeeping(dd, reinit); + if (ret) + goto done; + + /* + * we ignore most issues after reporting them, but have to specially + * handle hardware-disabled chips. + */ + if (ret == 2) { + /* unique error, known to ipath_init_one */ + ret = -EPERM; + goto done; + } + + /* + * We could bump this to allow for full rcvegrcnt + rcvtidcnt, + * but then it no longer nicely fits power of two, and since + * we now use routines that backend onto __get_free_pages, the + * rest would be wasted. + */ + dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt, + dd->ipath_rcvhdrcnt); + + /* + * Set up the shadow copies of the piobufavail registers, + * which we compare against the chip registers for now, and + * the in memory DMA'ed copies of the registers. This has to + * be done early, before we calculate lastport, etc. + */ + piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + /* + * calc number of pioavail registers, and save it; we have 2 + * bits per buffer. + */ + dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) + / (sizeof(u64) * BITS_PER_BYTE / 2); + uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; + if (piobufs > 144) + defkbufs = 32 + dd->ipath_pioreserved; + else + defkbufs = 16 + dd->ipath_pioreserved; + + if (ipath_kpiobufs && (ipath_kpiobufs + + (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) { + int i = (int) piobufs - + (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); + if (i < 1) + i = 1; + dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " + "%d for kernel leaves too few for %d user ports " + "(%d each); using %u\n", ipath_kpiobufs, + piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); + /* + * shouldn't change ipath_kpiobufs, because could be + * different for different devices... + */ + kpiobufs = i; + } else if (ipath_kpiobufs) + kpiobufs = ipath_kpiobufs; + else + kpiobufs = defkbufs; + dd->ipath_lastport_piobuf = piobufs - kpiobufs; + dd->ipath_pbufsport = + uports ? dd->ipath_lastport_piobuf / uports : 0; + /* if not an even divisor, some user ports get extra buffers */ + dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf - + (dd->ipath_pbufsport * uports); + if (dd->ipath_ports_extrabuf) + ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to " + "ports <= %u\n", dd->ipath_pbufsport, + dd->ipath_ports_extrabuf); + dd->ipath_lastpioindex = 0; + dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; + /* ipath_pioavailshadow initialized earlier */ + ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " + "each for %u user ports\n", kpiobufs, + piobufs, dd->ipath_pbufsport, uports); + ret = dd->ipath_f_early_init(dd); + if (ret) { + ipath_dev_err(dd, "Early initialization failure\n"); + goto done; + } + + /* + * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be + * done after early_init. + */ + dd->ipath_hdrqlast = + dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize, + dd->ipath_rcvhdrentsize); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, + dd->ipath_rcvhdrsize); + + if (!reinit) { + ret = init_pioavailregs(dd); + init_shadow_tids(dd); + if (ret) + goto done; + } + + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, + dd->ipath_pioavailregs_phys); + + /* + * this is to detect s/w errors, which the h/w works around by + * ignoring the low 6 bits of address, if it wasn't aligned. + */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr); + if (val != dd->ipath_pioavailregs_phys) { + ipath_dev_err(dd, "Catastrophic software error, " + "SendPIOAvailAddr written as %lx, " + "read back as %llx\n", + (unsigned long) dd->ipath_pioavailregs_phys, + (unsigned long long) val); + ret = -EINVAL; + goto done; + } + + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP); + + /* + * make sure we are not in freeze, and PIO send enabled, so + * writes to pbc happen + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, + ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); + + /* + * before error clears, since we expect serdes pll errors during + * this, the first time after reset + */ + if (bringup_link(dd)) { + dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n"); + ret = -ENETDOWN; + goto done; + } + + /* + * clear any "expected" hwerrs from reset and/or initialization + * clear any that aren't enabled (at least this once), and then + * set the enable mask + */ + dd->ipath_f_init_hwerrors(dd); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, + ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, + dd->ipath_hwerrmask); + + /* clear all */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); + /* enable errors that are masked, at least this first time. */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + ~dd->ipath_maskederrs); + dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */ + dd->ipath_errormask = + ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); + /* clear any interrupts up to this point (ints still not enabled) */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); + + dd->ipath_f_tidtemplate(dd); + + /* + * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing + * re-init, the simplest way to handle this is to free + * existing, and re-allocate. + * Need to re-create rest of port 0 portdata as well. + */ + pd = dd->ipath_pd[0]; + if (reinit) { + struct ipath_portdata *npd; + + /* + * Alloc and init new ipath_portdata for port0, + * Then free old pd. Could lead to fragmentation, but also + * makes later support for hot-swap easier. + */ + npd = create_portdata0(dd); + if (npd) { + ipath_free_pddata(dd, pd); + dd->ipath_pd[0] = npd; + pd = npd; + } else { + ipath_dev_err(dd, "Unable to allocate portdata" + " for port 0, failing\n"); + ret = -ENOMEM; + goto done; + } + } + ret = ipath_create_rcvhdrq(dd, pd); + if (!ret) + ret = create_port0_egr(dd); + if (ret) { + ipath_dev_err(dd, "failed to allocate kernel port's " + "rcvhdrq and/or egr bufs\n"); + goto done; + } + else + enable_chip(dd, reinit); + + /* after enable_chip, so pioavailshadow setup */ + ipath_chg_pioavailkernel(dd, 0, piobufs, 1); + + /* + * Cancel any possible active sends from early driver load. + * Follows early_init because some chips have to initialize + * PIO buffers in early_init to avoid false parity errors. + * After enable and ipath_chg_pioavailkernel so we can safely + * enable pioavail updates and PIOENABLE; packets are now + * ready to go out. + */ + ipath_cancel_sends(dd, 1); + + if (!reinit) { + /* + * Used when we close a port, for DMA already in flight + * at close. + */ + dd->ipath_dummy_hdrq = dma_alloc_coherent( + &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size, + &dd->ipath_dummy_hdrq_phys, + gfp_flags); + if (!dd->ipath_dummy_hdrq) { + dev_info(&dd->pcidev->dev, + "Couldn't allocate 0x%lx bytes for dummy hdrq\n", + dd->ipath_pd[0]->port_rcvhdrq_size); + /* fallback to just 0'ing */ + dd->ipath_dummy_hdrq_phys = 0UL; + } + } + + /* + * cause retrigger of pending interrupts ignored during init, + * even if we had errors + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); + + if (!dd->ipath_stats_timer_active) { + /* + * first init, or after an admin disable/enable + * set up stats retrieval timer, even if we had errors + * in last portion of setup + */ + init_timer(&dd->ipath_stats_timer); + dd->ipath_stats_timer.function = ipath_get_faststats; + dd->ipath_stats_timer.data = (unsigned long) dd; + /* every 5 seconds; */ + dd->ipath_stats_timer.expires = jiffies + 5 * HZ; + /* takes ~16 seconds to overflow at full IB 4x bandwdith */ + add_timer(&dd->ipath_stats_timer); + dd->ipath_stats_timer_active = 1; + } + + /* Set up SendDMA if chip supports it */ + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ret = setup_sdma(dd); + + /* Set up HoL state */ + init_timer(&dd->ipath_hol_timer); + dd->ipath_hol_timer.function = ipath_hol_event; + dd->ipath_hol_timer.data = (unsigned long)dd; + dd->ipath_hol_state = IPATH_HOL_UP; + +done: + if (!ret) { + *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT; + if (!dd->ipath_f_intrsetup(dd)) { + /* now we can enable all interrupts from the chip */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, + -1LL); + /* force re-interrupt of any pending interrupts. */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, + 0ULL); + /* chip is usable; mark it as initialized */ + *dd->ipath_statusp |= IPATH_STATUS_INITTED; + + /* + * setup to verify we get an interrupt, and fallback + * to an alternate if necessary and possible + */ + if (!reinit) { + init_timer(&dd->ipath_intrchk_timer); + dd->ipath_intrchk_timer.function = + verify_interrupt; + dd->ipath_intrchk_timer.data = + (unsigned long) dd; + } + dd->ipath_intrchk_timer.expires = jiffies + HZ/2; + add_timer(&dd->ipath_intrchk_timer); + } else + ipath_dev_err(dd, "No interrupts enabled, couldn't " + "setup interrupt address\n"); + + if (dd->ipath_cfgports > ipath_stats.sps_nports) + /* + * sps_nports is a global, so, we set it to + * the highest number of ports of any of the + * chips we find; we never decrement it, at + * least for now. Since this might have changed + * over disable/enable or prior to reset, always + * do the check and potentially adjust. + */ + ipath_stats.sps_nports = dd->ipath_cfgports; + } else + ipath_dbg("Failed (%d) to initialize chip\n", ret); + + /* if ret is non-zero, we probably should do some cleanup + here... */ + return ret; +} + +static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp) +{ + struct ipath_devdata *dd; + unsigned long flags; + unsigned short val; + int ret; + + ret = ipath_parse_ushort(str, &val); + + spin_lock_irqsave(&ipath_devs_lock, flags); + + if (ret < 0) + goto bail; + + if (val == 0) { + ret = -EINVAL; + goto bail; + } + + list_for_each_entry(dd, &ipath_dev_list, ipath_list) { + if (dd->ipath_kregbase) + continue; + if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - + (dd->ipath_cfgports * + IPATH_MIN_USER_PORT_BUFCNT))) + { + ipath_dev_err( + dd, + "Allocating %d PIO bufs for kernel leaves " + "too few for %d user ports (%d each)\n", + val, dd->ipath_cfgports - 1, + IPATH_MIN_USER_PORT_BUFCNT); + ret = -EINVAL; + goto bail; + } + dd->ipath_lastport_piobuf = + dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; + } + + ipath_kpiobufs = val; + ret = 0; +bail: + spin_unlock_irqrestore(&ipath_devs_lock, flags); + + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c new file mode 100644 index 00000000..c0a03ac0 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -0,0 +1,1274 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_verbs.h" +#include "ipath_common.h" + + +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + */ +void ipath_disarm_senderrbufs(struct ipath_devdata *dd) +{ + u32 piobcnt; + unsigned long sbuf[4]; + /* + * it's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling + */ + piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + /* read these before writing errorclear */ + sbuf[0] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror); + sbuf[1] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 1); + if (piobcnt > 128) + sbuf[2] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 2); + if (piobcnt > 192) + sbuf[3] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 3); + else + sbuf[3] = 0; + + if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { + int i; + if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && + dd->ipath_lastcancel > jiffies) { + __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, + "SendbufErrs %lx %lx", sbuf[0], + sbuf[1]); + if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) + printk(" %lx %lx ", sbuf[2], sbuf[3]); + printk("\n"); + } + + for (i = 0; i < piobcnt; i++) + if (test_bit(i, sbuf)) + ipath_disarm_piobufs(dd, i, 1); + /* ignore armlaunch errs for a bit */ + dd->ipath_lastcancel = jiffies+3; + } +} + + +/* These are all rcv-related errors which we want to count for stats */ +#define E_SUM_PKTERRS \ + (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ + INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \ + INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \ + INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \ + INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \ + INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP) + +/* These are all send-related errors which we want to count for stats */ +#define E_SUM_ERRS \ + (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \ + INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ + INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \ + INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ + INFINIPATH_E_INVALIDADDR) + +/* + * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore + * errors not related to freeze and cancelling buffers. Can't ignore + * armlaunch because could get more while still cleaning up, and need + * to cancel those as they happen. + */ +#define E_SPKT_ERRS_IGNORE \ + (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ + INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \ + INFINIPATH_E_SPKTLEN) + +/* + * these are errors that can occur when the link changes state while + * a packet is being sent or received. This doesn't cover things + * like EBP or VCRC that can be the result of a sending having the + * link change state, so we receive a "known bad" packet. + */ +#define E_SUM_LINK_PKTERRS \ + (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ + INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ + INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \ + INFINIPATH_E_RUNEXPCHAR) + +static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) +{ + u64 ignore_this_time = 0; + + ipath_disarm_senderrbufs(dd); + if ((errs & E_SUM_LINK_PKTERRS) && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + ipath_dbg("Ignoring packet errors %llx, because link not " + "ACTIVE\n", (unsigned long long) errs); + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + + return ignore_this_time; +} + +/* generic hw error messages... */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \ + .msg = "TXE " #a " Memory Parity" \ + } +#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \ + .msg = "RXE " #a " Memory Parity" \ + } + +static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"), + INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"), + + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO), + + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO), +}; + +/** + * ipath_format_hwmsg - format a single hwerror message + * @msg message buffer + * @msgl length of message buffer + * @hwmsg message to add to message buffer + */ +static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) +{ + strlcat(msg, "[", msgl); + strlcat(msg, hwmsg, msgl); + strlcat(msg, "]", msgl); +} + +/** + * ipath_format_hwerrors - format hardware error messages for display + * @hwerrs hardware errors bit vector + * @hwerrmsgs hardware error descriptions + * @nhwerrmsgs number of hwerrmsgs + * @msg message buffer + * @msgl message buffer length + */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t msgl) +{ + int i; + const int glen = + sizeof(ipath_generic_hwerror_msgs) / + sizeof(ipath_generic_hwerror_msgs[0]); + + for (i=0; iib_init) + ret = "Init"; + else if (state == dd->ib_arm) + ret = "Arm"; + else if (state == dd->ib_active) + ret = "Active"; + else + ret = "Down"; + return ret; +} + +void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev) +{ + struct ib_event event; + + event.device = &dd->verbs_dev->ibdev; + event.element.port_num = 1; + event.event = ev; + ib_dispatch_event(&event); +} + +static void handle_e_ibstatuschanged(struct ipath_devdata *dd, + ipath_err_t errs) +{ + u32 ltstate, lstate, ibstate, lastlstate; + u32 init = dd->ib_init; + u32 arm = dd->ib_arm; + u32 active = dd->ib_active; + const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); + + lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */ + ibstate = ipath_ib_state(dd, ibcs); + /* linkstate at last interrupt */ + lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); + ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */ + + /* + * Since going into a recovery state causes the link state to go + * down and since recovery is transitory, it is better if we "miss" + * ever seeing the link training state go into recovery (i.e., + * ignore this transition for link state special handling purposes) + * without even updating ipath_lastibcstat. + */ + if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) || + (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) || + (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE)) + goto done; + + /* + * if linkstate transitions into INIT from any of the various down + * states, or if it transitions from any of the up (INIT or better) + * states into any of the down states (except link recovery), then + * call the chip-specific code to take appropriate actions. + */ + if (lstate >= INFINIPATH_IBCS_L_STATE_INIT && + lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) { + /* transitioned to UP */ + if (dd->ipath_f_ib_updown(dd, 1, ibcs)) { + /* link came up, so we must no longer be disabled */ + dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; + ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n"); + goto skip_ibchange; /* chip-code handled */ + } + } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT || + (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) && + ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT && + ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { + int handled; + handled = dd->ipath_f_ib_updown(dd, 0, ibcs); + dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY; + if (handled) { + ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n"); + goto skip_ibchange; /* chip-code handled */ + } + } + + /* + * Significant enough to always print and get into logs, if it was + * unexpected. If it was a requested state change, we'll have + * already cleared the flags, so we won't print this warning + */ + if ((ibstate != arm && ibstate != active) && + (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) { + dev_info(&dd->pcidev->dev, "Link state changed from %s " + "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ? + "ARM" : "ACTIVE", ib_linkstate(dd, ibcs)); + } + + if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || + ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { + u32 lastlts; + lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); + /* + * Ignore cycling back and forth from Polling.Active to + * Polling.Quiet while waiting for the other end of the link + * to come up, except to try and decide if we are connected + * to a live IB device or not. We will cycle back and + * forth between them if no cable is plugged in, the other + * device is powered off or disabled, etc. + */ + if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || + lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { + if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) && + (++dd->ipath_ibpollcnt == 40)) { + dd->ipath_flags |= IPATH_NOCABLE; + *dd->ipath_statusp |= + IPATH_STATUS_IB_NOCABLE; + ipath_cdbg(LINKVERB, "Set NOCABLE\n"); + } + ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n", + ipath_ibcstatus_str[ltstate], ibstate); + goto skip_ibchange; + } + } + + dd->ipath_ibpollcnt = 0; /* not poll*, now */ + ipath_stats.sps_iblink++; + + if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) { + u64 linkrecov; + linkrecov = ipath_snap_cntr(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (linkrecov != dd->ipath_lastlinkrecov) { + ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", + (unsigned long long) ibcs, + ib_linkstate(dd, ibcs), + ipath_ibcstatus_str[ltstate], + (unsigned long long) linkrecov); + /* and no more until active again */ + dd->ipath_lastlinkrecov = 0; + ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); + goto skip_ibchange; + } + } + + if (ibstate == init || ibstate == arm || ibstate == active) { + *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE; + if (ibstate == init || ibstate == arm) { + *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + if (dd->ipath_flags & IPATH_LINKACTIVE) + signal_ib_event(dd, IB_EVENT_PORT_ERR); + } + if (ibstate == arm) { + dd->ipath_flags |= IPATH_LINKARMED; + dd->ipath_flags &= ~(IPATH_LINKUNK | + IPATH_LINKINIT | IPATH_LINKDOWN | + IPATH_LINKACTIVE | IPATH_NOCABLE); + ipath_hol_down(dd); + } else if (ibstate == init) { + /* + * set INIT and DOWN. Down is checked by + * most of the other code, but INIT is + * useful to know in a few places. + */ + dd->ipath_flags |= IPATH_LINKINIT | + IPATH_LINKDOWN; + dd->ipath_flags &= ~(IPATH_LINKUNK | + IPATH_LINKARMED | IPATH_LINKACTIVE | + IPATH_NOCABLE); + ipath_hol_down(dd); + } else { /* active */ + dd->ipath_lastlinkrecov = ipath_snap_cntr(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + *dd->ipath_statusp |= + IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; + dd->ipath_flags |= IPATH_LINKACTIVE; + dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT + | IPATH_LINKDOWN | IPATH_LINKARMED | + IPATH_NOCABLE); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ipath_restart_sdma(dd); + signal_ib_event(dd, IB_EVENT_PORT_ACTIVE); + /* LED active not handled in chip _f_updown */ + dd->ipath_f_setextled(dd, lstate, ltstate); + ipath_hol_up(dd); + } + + /* + * print after we've already done the work, so as not to + * delay the state changes and notifications, for debugging + */ + if (lstate == lastlstate) + ipath_cdbg(LINKVERB, "Unchanged from last: %s " + "(%x)\n", ib_linkstate(dd, ibcs), ibstate); + else + ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n", + dd->ipath_unit, ib_linkstate(dd, ibcs), + ipath_ibcstatus_str[ltstate], ibstate); + } else { /* down */ + if (dd->ipath_flags & IPATH_LINKACTIVE) + signal_ib_event(dd, IB_EVENT_PORT_ERR); + dd->ipath_flags |= IPATH_LINKDOWN; + dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT + | IPATH_LINKACTIVE | + IPATH_LINKARMED); + *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + dd->ipath_lli_counter = 0; + + if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN) + ipath_cdbg(VERBOSE, "Unit %u link state down " + "(state 0x%x), from %s\n", + dd->ipath_unit, lstate, + ib_linkstate(dd, dd->ipath_lastibcstat)); + else + ipath_cdbg(LINKVERB, "Unit %u link state changed " + "to %s (0x%x) from down (%x)\n", + dd->ipath_unit, + ipath_ibcstatus_str[ltstate], + ibstate, lastlstate); + } + +skip_ibchange: + dd->ipath_lastibcstat = ibcs; +done: + return; +} + +static void handle_supp_msgs(struct ipath_devdata *dd, + unsigned supp_msgs, char *msg, u32 msgsz) +{ + /* + * Print the message unless it's ibc status change only, which + * happens so often we never want to count it. + */ + if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { + int iserr; + ipath_err_t mask; + iserr = ipath_decode_err(dd, msg, msgsz, + dd->ipath_lasterror & + ~INFINIPATH_E_IBSTATUSCHANGED); + + mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED; + + /* if we're in debug, then don't mask SDMADISABLED msgs */ + if (ipath_debug & __IPATH_DBG) + mask &= ~INFINIPATH_E_SDMADISABLED; + + if (dd->ipath_lasterror & ~mask) + ipath_dev_err(dd, "Suppressed %u messages for " + "fast-repeating errors (%s) (%llx)\n", + supp_msgs, msg, + (unsigned long long) + dd->ipath_lasterror); + else { + /* + * rcvegrfull and rcvhdrqfull are "normal", for some + * types of processes (mostly benchmarks) that send + * huge numbers of messages, while not processing + * them. So only complain about these at debug + * level. + */ + if (iserr) + ipath_dbg("Suppressed %u messages for %s\n", + supp_msgs, msg); + else + ipath_cdbg(ERRPKT, + "Suppressed %u messages for %s\n", + supp_msgs, msg); + } + } +} + +static unsigned handle_frequent_errors(struct ipath_devdata *dd, + ipath_err_t errs, char *msg, + u32 msgsz, int *noprint) +{ + unsigned long nc; + static unsigned long nextmsg_time; + static unsigned nmsgs, supp_msgs; + + /* + * Throttle back "fast" messages to no more than 10 per 5 seconds. + * This isn't perfect, but it's a reasonable heuristic. If we get + * more than 10, give a 6x longer delay. + */ + nc = jiffies; + if (nmsgs > 10) { + if (time_before(nc, nextmsg_time)) { + *noprint = 1; + if (!supp_msgs++) + nextmsg_time = nc + HZ * 3; + } + else if (supp_msgs) { + handle_supp_msgs(dd, supp_msgs, msg, msgsz); + supp_msgs = 0; + nmsgs = 0; + } + } + else if (!nmsgs++ || time_after(nc, nextmsg_time)) + nextmsg_time = nc + HZ / 2; + + return supp_msgs; +} + +static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs) +{ + unsigned long flags; + int expected; + + if (ipath_debug & __IPATH_DBG) { + char msg[128]; + ipath_decode_err(dd, msg, sizeof msg, errs & + INFINIPATH_E_SDMAERRS); + ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg); + } + if (ipath_debug & __IPATH_VERBDBG) { + unsigned long tl, hd, status, lengen; + tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); + hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); + status = ipath_read_kreg64(dd + , dd->ipath_kregs->kr_senddmastatus); + lengen = ipath_read_kreg64(dd, + dd->ipath_kregs->kr_senddmalengen); + ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx " + "lengen 0x%lx\n", tl, hd, status, lengen); + } + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!expected) + ipath_cancel_sends(dd, 1); +} + +static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) +{ + unsigned long flags; + int expected; + + if ((istat & INFINIPATH_I_SDMAINT) && + !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + ipath_sdma_intr(dd); + + if (istat & INFINIPATH_I_SDMADISABLED) { + expected = test_bit(IPATH_SDMA_ABORTING, + &dd->ipath_sdma_status); + ipath_dbg("%s SDmaDisabled intr\n", + expected ? "expected" : "unexpected"); + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!expected) + ipath_cancel_sends(dd, 1); + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + } +} + +static int handle_hdrq_full(struct ipath_devdata *dd) +{ + int chkerrpkts = 0; + u32 hd, tl; + u32 i; + + ipath_stats.sps_hdrqfull++; + for (i = 0; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + if (i == 0) { + /* + * For kernel receive queues, we just want to know + * if there are packets in the queue that we can + * process. + */ + if (pd->port_head != ipath_get_hdrqtail(pd)) + chkerrpkts |= 1 << i; + continue; + } + + /* Skip if user context is not open */ + if (!pd || !pd->port_cnt) + continue; + + /* Don't report the same point multiple times. */ + if (dd->ipath_flags & IPATH_NODMA_RTAIL) + tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i); + else + tl = ipath_get_rcvhdrtail(pd); + if (tl == pd->port_lastrcvhdrqtail) + continue; + + hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); + if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) { + pd->port_lastrcvhdrqtail = tl; + pd->port_hdrqfull++; + /* flush hdrqfull so that poll() sees it */ + wmb(); + wake_up_interruptible(&pd->port_wait); + } + } + + return chkerrpkts; +} + +static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) +{ + char msg[128]; + u64 ignore_this_time = 0; + u64 iserr = 0; + int chkerrpkts = 0, noprint = 0; + unsigned supp_msgs; + int log_idx; + + /* + * don't report errors that are masked, either at init + * (not set in ipath_errormask), or temporarily (set in + * ipath_maskederrs) + */ + errs &= dd->ipath_errormask & ~dd->ipath_maskederrs; + + supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg, + &noprint); + + /* do these first, they are most important */ + if (errs & INFINIPATH_E_HARDWARE) { + /* reuse same msg buf */ + dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); + } else { + u64 mask; + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) { + mask = dd->ipath_eep_st_masks[log_idx].errs_to_log; + if (errs & mask) + ipath_inc_eeprom_err(dd, log_idx, 1); + } + } + + if (errs & INFINIPATH_E_SDMAERRS) + handle_sdma_errors(dd, errs); + + if (!noprint && (errs & ~dd->ipath_e_bitsextant)) + ipath_dev_err(dd, "error interrupt with unknown errors " + "%llx set\n", (unsigned long long) + (errs & ~dd->ipath_e_bitsextant)); + + if (errs & E_SUM_ERRS) + ignore_this_time = handle_e_sum_errs(dd, errs); + else if ((errs & E_SUM_LINK_PKTERRS) && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + ipath_dbg("Ignoring packet errors %llx, because link not " + "ACTIVE\n", (unsigned long long) errs); + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + + if (supp_msgs == 250000) { + int s_iserr; + /* + * It's not entirely reasonable assuming that the errors set + * in the last clear period are all responsible for the + * problem, but the alternative is to assume it's the only + * ones on this particular interrupt, which also isn't great + */ + dd->ipath_maskederrs |= dd->ipath_lasterror | errs; + + dd->ipath_errormask &= ~dd->ipath_maskederrs; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); + s_iserr = ipath_decode_err(dd, msg, sizeof msg, + dd->ipath_maskederrs); + + if (dd->ipath_maskederrs & + ~(INFINIPATH_E_RRCVEGRFULL | + INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) + ipath_dev_err(dd, "Temporarily disabling " + "error(s) %llx reporting; too frequent (%s)\n", + (unsigned long long) dd->ipath_maskederrs, + msg); + else { + /* + * rcvegrfull and rcvhdrqfull are "normal", + * for some types of processes (mostly benchmarks) + * that send huge numbers of messages, while not + * processing them. So only complain about + * these at debug level. + */ + if (s_iserr) + ipath_dbg("Temporarily disabling reporting " + "too frequent queue full errors (%s)\n", + msg); + else + ipath_cdbg(ERRPKT, + "Temporarily disabling reporting too" + " frequent packet errors (%s)\n", + msg); + } + + /* + * Re-enable the masked errors after around 3 minutes. in + * ipath_get_faststats(). If we have a series of fast + * repeating but different errors, the interval will keep + * stretching out, but that's OK, as that's pretty + * catastrophic. + */ + dd->ipath_unmasktime = jiffies + HZ * 180; + } + + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs); + if (ignore_this_time) + errs &= ~ignore_this_time; + if (errs & ~dd->ipath_lasterror) { + errs &= ~dd->ipath_lasterror; + /* never suppress duplicate hwerrors or ibstatuschange */ + dd->ipath_lasterror |= errs & + ~(INFINIPATH_E_HARDWARE | + INFINIPATH_E_IBSTATUSCHANGED); + } + + if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) { + dd->ipath_spectriggerhit++; + ipath_dbg("%lu special trigger hits\n", + dd->ipath_spectriggerhit); + } + + /* likely due to cancel; so suppress message unless verbose */ + if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && + dd->ipath_lastcancel > jiffies) { + /* armlaunch takes precedence; it often causes both. */ + ipath_cdbg(VERBOSE, + "Suppressed %s error (%llx) after sendbuf cancel\n", + (errs & INFINIPATH_E_SPIOARMLAUNCH) ? + "armlaunch" : "sendpktlen", (unsigned long long)errs); + errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); + } + + if (!errs) + return 0; + + if (!noprint) { + ipath_err_t mask; + /* + * The ones we mask off are handled specially below + * or above. Also mask SDMADISABLED by default as it + * is too chatty. + */ + mask = INFINIPATH_E_IBSTATUSCHANGED | + INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED; + + /* if we're in debug, then don't mask SDMADISABLED msgs */ + if (ipath_debug & __IPATH_DBG) + mask &= ~INFINIPATH_E_SDMADISABLED; + + ipath_decode_err(dd, msg, sizeof msg, errs & ~mask); + } else + /* so we don't need if (!noprint) at strlcat's below */ + *msg = 0; + + if (errs & E_SUM_PKTERRS) { + ipath_stats.sps_pkterrs++; + chkerrpkts = 1; + } + if (errs & E_SUM_ERRS) + ipath_stats.sps_errs++; + + if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) { + ipath_stats.sps_crcerrs++; + chkerrpkts = 1; + } + iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS); + + + /* + * We don't want to print these two as they happen, or we can make + * the situation even worse, because it takes so long to print + * messages to serial consoles. Kernel ports get printed from + * fast_stats, no more than every 5 seconds, user ports get printed + * on close + */ + if (errs & INFINIPATH_E_RRCVHDRFULL) + chkerrpkts |= handle_hdrq_full(dd); + if (errs & INFINIPATH_E_RRCVEGRFULL) { + struct ipath_portdata *pd = dd->ipath_pd[0]; + + /* + * since this is of less importance and not likely to + * happen without also getting hdrfull, only count + * occurrences; don't check each port (or even the kernel + * vs user) + */ + ipath_stats.sps_etidfull++; + if (pd->port_head != ipath_get_hdrqtail(pd)) + chkerrpkts |= 1; + } + + /* + * do this before IBSTATUSCHANGED, in case both bits set in a single + * interrupt; we want the STATUSCHANGE to "win", so we do our + * internal copy of state machine correctly + */ + if (errs & INFINIPATH_E_RIBLOSTLINK) { + /* + * force through block below + */ + errs |= INFINIPATH_E_IBSTATUSCHANGED; + ipath_stats.sps_iblink++; + dd->ipath_flags |= IPATH_LINKDOWN; + dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT + | IPATH_LINKARMED | IPATH_LINKACTIVE); + *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + + ipath_dbg("Lost link, link now down (%s)\n", + ipath_ibcstatus_str[ipath_read_kreg64(dd, + dd->ipath_kregs->kr_ibcstatus) & 0xf]); + } + if (errs & INFINIPATH_E_IBSTATUSCHANGED) + handle_e_ibstatuschanged(dd, errs); + + if (errs & INFINIPATH_E_RESET) { + if (!noprint) + ipath_dev_err(dd, "Got reset, requires re-init " + "(unload and reload driver)\n"); + dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */ + /* mark as having had error */ + *dd->ipath_statusp |= IPATH_STATUS_HWERROR; + *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; + } + + if (!noprint && *msg) { + if (iserr) + ipath_dev_err(dd, "%s error\n", msg); + } + if (dd->ipath_state_wanted & dd->ipath_flags) { + ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " + "waking\n", dd->ipath_state_wanted, + dd->ipath_flags); + wake_up_interruptible(&ipath_state_wait); + } + + return chkerrpkts; +} + +/* + * try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + */ +void ipath_clear_freeze(struct ipath_devdata *dd) +{ + /* disable error interrupts, to avoid confusion */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); + + /* also disable interrupts; errormask is sometimes overwriten */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); + + ipath_cancel_sends(dd, 1); + + /* clear the freeze, and be sure chip saw it */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + + /* force in-memory update now we are out of freeze */ + ipath_force_pio_avail_update(dd); + + /* + * force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, + E_SPKT_ERRS_IGNORE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); +} + + +/* this is separate to allow for better optimization of ipath_intr() */ + +static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp) +{ + /* + * sometimes happen during driver init and unload, don't want + * to process any interrupts at that point + */ + + /* this is just a bandaid, not a fix, if something goes badly + * wrong */ + if (++*unexpectp > 100) { + if (++*unexpectp > 105) { + /* + * ok, we must be taking somebody else's interrupts, + * due to a messed up mptable and/or PIRQ table, so + * unregister the interrupt. We've seen this during + * linuxbios development work, and it may happen in + * the future again. + */ + if (dd->pcidev && dd->ipath_irq) { + ipath_dev_err(dd, "Now %u unexpected " + "interrupts, unregistering " + "interrupt handler\n", + *unexpectp); + ipath_dbg("free_irq of irq %d\n", + dd->ipath_irq); + dd->ipath_f_free_irq(dd); + } + } + if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) { + ipath_dev_err(dd, "%u unexpected interrupts, " + "disabling interrupts completely\n", + *unexpectp); + /* + * disable all interrupts, something is very wrong + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, + 0ULL); + } + } else if (*unexpectp > 1) + ipath_dbg("Interrupt when not ready, should not happen, " + "ignoring\n"); +} + +static noinline void ipath_bad_regread(struct ipath_devdata *dd) +{ + static int allbits; + + /* separate routine, for better optimization of ipath_intr() */ + + /* + * We print the message and disable interrupts, in hope of + * having a better chance of debugging the problem. + */ + ipath_dev_err(dd, + "Read of interrupt status failed (all bits set)\n"); + if (allbits++) { + /* disable all interrupts, something is very wrong */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); + if (allbits == 2) { + ipath_dev_err(dd, "Still bad interrupt status, " + "unregistering interrupt\n"); + dd->ipath_f_free_irq(dd); + } else if (allbits > 2) { + if ((allbits % 10000) == 0) + printk("."); + } else + ipath_dev_err(dd, "Disabling interrupts, " + "multiple errors\n"); + } +} + +static void handle_layer_pioavail(struct ipath_devdata *dd) +{ + unsigned long flags; + int ret; + + ret = ipath_ib_piobufavail(dd->verbs_dev); + if (ret > 0) + goto set; + + return; +set: + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); +} + +/* + * Handle receive interrupts for user ports; this means a user + * process was waiting for a packet to arrive, and didn't want + * to poll + */ +static void handle_urcv(struct ipath_devdata *dd, u64 istat) +{ + u64 portr; + int i; + int rcvdint = 0; + + /* + * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and + * test_and_clear_bit(IPATH_PORT_WAITING_URG) below + * would both like timely updates of the bits so that + * we don't pass them by unnecessarily. the rmb() + * here ensures that we see them promptly -- the + * corresponding wmb()'s are in ipath_poll_urgent() + * and ipath_poll_next()... + */ + rmb(); + portr = ((istat >> dd->ipath_i_rcvavail_shift) & + dd->ipath_i_rcvavail_mask) | + ((istat >> dd->ipath_i_rcvurg_shift) & + dd->ipath_i_rcvurg_mask); + for (i = 1; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + if (portr & (1 << i) && pd && pd->port_cnt) { + if (test_and_clear_bit(IPATH_PORT_WAITING_RCV, + &pd->port_flag)) { + clear_bit(i + dd->ipath_r_intravail_shift, + &dd->ipath_rcvctrl); + wake_up_interruptible(&pd->port_wait); + rcvdint = 1; + } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG, + &pd->port_flag)) { + pd->port_urgent++; + wake_up_interruptible(&pd->port_wait); + } + } + } + if (rcvdint) { + /* only want to take one interrupt, so turn off the rcv + * interrupt for all the ports that we set the rcv_waiting + * (but never for kernel port) + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + } +} + +irqreturn_t ipath_intr(int irq, void *data) +{ + struct ipath_devdata *dd = data; + u64 istat, chk0rcv = 0; + ipath_err_t estat = 0; + irqreturn_t ret; + static unsigned unexpected = 0; + u64 kportrbits; + + ipath_stats.sps_ints++; + + if (dd->ipath_int_counter != (u32) -1) + dd->ipath_int_counter++; + + if (!(dd->ipath_flags & IPATH_PRESENT)) { + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + } + + /* + * this needs to be flags&initted, not statusp, so we keep + * taking interrupts even after link goes down, etc. + * Also, we *must* clear the interrupt at some point, or we won't + * take it again, which can be real bad for errors, etc... + */ + + if (!(dd->ipath_flags & IPATH_INITTED)) { + ipath_bad_intr(dd, &unexpected); + ret = IRQ_NONE; + goto bail; + } + + istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus); + + if (unlikely(!istat)) { + ipath_stats.sps_nullintr++; + ret = IRQ_NONE; /* not our interrupt, or already handled */ + goto bail; + } + if (unlikely(istat == -1)) { + ipath_bad_regread(dd); + /* don't know if it was our interrupt or not */ + ret = IRQ_NONE; + goto bail; + } + + if (unexpected) + unexpected = 0; + + if (unlikely(istat & ~dd->ipath_i_bitsextant)) + ipath_dev_err(dd, + "interrupt with unknown interrupts %Lx set\n", + (unsigned long long) + istat & ~dd->ipath_i_bitsextant); + else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ + ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", + (unsigned long long) istat); + + if (istat & INFINIPATH_I_ERROR) { + ipath_stats.sps_errints++; + estat = ipath_read_kreg64(dd, + dd->ipath_kregs->kr_errorstatus); + if (!estat) + dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " + "but no error bits set!\n", + (unsigned long long) istat); + else if (estat == -1LL) + /* + * should we try clearing all, or hope next read + * works? + */ + ipath_dev_err(dd, "Read of error status failed " + "(all bits set); ignoring\n"); + else + chk0rcv |= handle_errors(dd, estat); + } + + if (istat & INFINIPATH_I_GPIO) { + /* + * GPIO interrupts fall in two broad classes: + * GPIO_2 indicates (on some HT4xx boards) that a packet + * has arrived for Port 0. Checking for this + * is controlled by flag IPATH_GPIO_INTR. + * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate + * errors that we need to count. Checking for this + * is controlled by flag IPATH_GPIO_ERRINTRS. + */ + u32 gpiostatus; + u32 to_clear = 0; + + gpiostatus = ipath_read_kreg32( + dd, dd->ipath_kregs->kr_gpio_status); + /* First the error-counter case. */ + if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) && + (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) { + /* want to clear the bits we see asserted. */ + to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK); + + /* + * Count appropriately, clear bits out of our copy, + * as they have been "handled". + */ + if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) { + ipath_dbg("FlowCtl on UnsupVL\n"); + dd->ipath_rxfc_unsupvl_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) { + ipath_dbg("Overrun Threshold exceeded\n"); + dd->ipath_overrun_thresh_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) { + ipath_dbg("Local Link Integrity error\n"); + dd->ipath_lli_errs++; + } + gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK; + } + /* Now the Port0 Receive case */ + if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) && + (dd->ipath_flags & IPATH_GPIO_INTR)) { + /* + * GPIO status bit 2 is set, and we expected it. + * clear it and indicate in p0bits. + * This probably only happens if a Port0 pkt + * arrives at _just_ the wrong time, and we + * handle that by seting chk0rcv; + */ + to_clear |= (1 << IPATH_GPIO_PORT0_BIT); + gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); + chk0rcv = 1; + } + if (gpiostatus) { + /* + * Some unexpected bits remain. If they could have + * caused the interrupt, complain and clear. + * To avoid repetition of this condition, also clear + * the mask. It is almost certainly due to error. + */ + const u32 mask = (u32) dd->ipath_gpio_mask; + + if (mask & gpiostatus) { + ipath_dbg("Unexpected GPIO IRQ bits %x\n", + gpiostatus & mask); + to_clear |= (gpiostatus & mask); + dd->ipath_gpio_mask &= ~(gpiostatus & mask); + ipath_write_kreg(dd, + dd->ipath_kregs->kr_gpio_mask, + dd->ipath_gpio_mask); + } + } + if (to_clear) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, + (u64) to_clear); + } + } + + /* + * Clear the interrupt bits we found set, unless they are receive + * related, in which case we already cleared them above, and don't + * want to clear them again, because we might lose an interrupt. + * Clear it early, so we "know" know the chip will have seen this by + * the time we process the queue, and will re-interrupt if necessary. + * The processor itself won't take the interrupt again until we return. + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); + + /* + * Handle kernel receive queues before checking for pio buffers + * available since receives can overflow; piobuf waiters can afford + * a few extra cycles, since they were waiting anyway, and user's + * waiting for receive are at the bottom. + */ + kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) | + (1ULL << dd->ipath_i_rcvurg_shift); + if (chk0rcv || (istat & kportrbits)) { + istat &= ~kportrbits; + ipath_kreceive(dd->ipath_pd[0]); + } + + if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) | + (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift))) + handle_urcv(dd, istat); + + if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED)) + handle_sdma_intr(dd, istat); + + if (istat & INFINIPATH_I_SPIOBUFAVAIL) { + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* always process; sdma verbs uses PIO for acks and VL15 */ + handle_layer_pioavail(dd); + } + + ret = IRQ_HANDLED; + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h new file mode 100644 index 00000000..6559af60 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -0,0 +1,1378 @@ +#ifndef _IPATH_KERNEL_H +#define _IPATH_KERNEL_H +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This header file is the base header file for infinipath kernel code + * ipath_user.h serves a similar purpose for user code. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipath_common.h" +#include "ipath_debug.h" +#include "ipath_registers.h" + +/* only s/w major version of InfiniPath we can handle */ +#define IPATH_CHIP_VERS_MAJ 2U + +/* don't care about this except printing */ +#define IPATH_CHIP_VERS_MIN 0U + +/* temporary, maybe always */ +extern struct infinipath_stats ipath_stats; + +#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ +/* + * First-cut critierion for "device is active" is + * two thousand dwords combined Tx, Rx traffic per + * 5-second interval. SMA packets are 64 dwords, + * and occur "a few per second", presumably each way. + */ +#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000) +/* + * Struct used to indicate which errors are logged in each of the + * error-counters that are logged to EEPROM. A counter is incremented + * _once_ (saturating at 255) for each event with any bits set in + * the error or hwerror register masks below. + */ +#define IPATH_EEP_LOG_CNT (4) +struct ipath_eep_log_mask { + u64 errs_to_log; + u64 hwerrs_to_log; +}; + +struct ipath_portdata { + void **port_rcvegrbuf; + dma_addr_t *port_rcvegrbuf_phys; + /* rcvhdrq base, needs mmap before useful */ + void *port_rcvhdrq; + /* kernel virtual address where hdrqtail is updated */ + void *port_rcvhdrtail_kvaddr; + /* + * temp buffer for expected send setup, allocated at open, instead + * of each setup call + */ + void *port_tid_pg_list; + /* when waiting for rcv or pioavail */ + wait_queue_head_t port_wait; + /* + * rcvegr bufs base, physical, must fit + * in 44 bits so 32 bit programs mmap64 44 bit works) + */ + dma_addr_t port_rcvegr_phys; + /* mmap of hdrq, must fit in 44 bits */ + dma_addr_t port_rcvhdrq_phys; + dma_addr_t port_rcvhdrqtailaddr_phys; + /* + * number of opens (including slave subports) on this instance + * (ignoring forks, dup, etc. for now) + */ + int port_cnt; + /* + * how much space to leave at start of eager TID entries for + * protocol use, on each TID + */ + /* instead of calculating it */ + unsigned port_port; + /* non-zero if port is being shared. */ + u16 port_subport_cnt; + /* non-zero if port is being shared. */ + u16 port_subport_id; + /* number of pio bufs for this port (all procs, if shared) */ + u32 port_piocnt; + /* first pio buffer for this port */ + u32 port_pio_base; + /* chip offset of PIO buffers for this port */ + u32 port_piobufs; + /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ + u32 port_rcvegrbuf_chunks; + /* how many egrbufs per chunk */ + u32 port_rcvegrbufs_perchunk; + /* order for port_rcvegrbuf_pages */ + size_t port_rcvegrbuf_size; + /* rcvhdrq size (for freeing) */ + size_t port_rcvhdrq_size; + /* next expected TID to check when looking for free */ + u32 port_tidcursor; + /* next expected TID to check */ + unsigned long port_flag; + /* what happened */ + unsigned long int_flag; + /* WAIT_RCV that timed out, no interrupt */ + u32 port_rcvwait_to; + /* WAIT_PIO that timed out, no interrupt */ + u32 port_piowait_to; + /* WAIT_RCV already happened, no wait */ + u32 port_rcvnowait; + /* WAIT_PIO already happened, no wait */ + u32 port_pionowait; + /* total number of rcvhdrqfull errors */ + u32 port_hdrqfull; + /* + * Used to suppress multiple instances of same + * port staying stuck at same point. + */ + u32 port_lastrcvhdrqtail; + /* saved total number of rcvhdrqfull errors for poll edge trigger */ + u32 port_hdrqfull_poll; + /* total number of polled urgent packets */ + u32 port_urgent; + /* saved total number of polled urgent packets for poll edge trigger */ + u32 port_urgent_poll; + /* pid of process using this port */ + struct pid *port_pid; + struct pid *port_subpid[INFINIPATH_MAX_SUBPORT]; + /* same size as task_struct .comm[] */ + char port_comm[16]; + /* pkeys set by this use of this port */ + u16 port_pkeys[4]; + /* so file ops can get at unit */ + struct ipath_devdata *port_dd; + /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ + void *subport_uregbase; + /* An array of pages for the eager receive buffers * N */ + void *subport_rcvegrbuf; + /* An array of pages for the eager header queue entries * N */ + void *subport_rcvhdr_base; + /* The version of the library which opened this port */ + u32 userversion; + /* Bitmask of active slaves */ + u32 active_slaves; + /* Type of packets or conditions we want to poll for */ + u16 poll_type; + /* port rcvhdrq head offset */ + u32 port_head; + /* receive packet sequence counter */ + u32 port_seq_cnt; +}; + +struct sk_buff; +struct ipath_sge_state; +struct ipath_verbs_txreq; + +/* + * control information for layered drivers + */ +struct _ipath_layer { + void *l_arg; +}; + +struct ipath_skbinfo { + struct sk_buff *skb; + dma_addr_t phys; +}; + +struct ipath_sdma_txreq { + int flags; + int sg_count; + union { + struct scatterlist *sg; + void *map_addr; + }; + void (*callback)(void *, int); + void *callback_cookie; + int callback_status; + u16 start_idx; /* sdma private */ + u16 next_descq_idx; /* sdma private */ + struct list_head list; /* sdma private */ +}; + +struct ipath_sdma_desc { + __le64 qw[2]; +}; + +#define IPATH_SDMA_TXREQ_F_USELARGEBUF 0x1 +#define IPATH_SDMA_TXREQ_F_HEADTOHOST 0x2 +#define IPATH_SDMA_TXREQ_F_INTREQ 0x4 +#define IPATH_SDMA_TXREQ_F_FREEBUF 0x8 +#define IPATH_SDMA_TXREQ_F_FREEDESC 0x10 +#define IPATH_SDMA_TXREQ_F_VL15 0x20 + +#define IPATH_SDMA_TXREQ_S_OK 0 +#define IPATH_SDMA_TXREQ_S_SENDERROR 1 +#define IPATH_SDMA_TXREQ_S_ABORTED 2 +#define IPATH_SDMA_TXREQ_S_SHUTDOWN 3 + +#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63) +#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62) +#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61) +#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30) + +/* max dwords in small buffer packet */ +#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2) + +/* + * Possible IB config parameters for ipath_f_get/set_ib_cfg() + */ +#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */ +#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */ +#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */ +#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */ +#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */ +#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */ +#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */ +#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */ +#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */ +#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */ +#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */ + + +struct ipath_devdata { + struct list_head ipath_list; + + struct ipath_kregs const *ipath_kregs; + struct ipath_cregs const *ipath_cregs; + + /* mem-mapped pointer to base of chip regs */ + u64 __iomem *ipath_kregbase; + /* end of mem-mapped chip space; range checking */ + u64 __iomem *ipath_kregend; + /* physical address of chip for io_remap, etc. */ + unsigned long ipath_physaddr; + /* base of memory alloced for ipath_kregbase, for free */ + u64 *ipath_kregalloc; + /* ipath_cfgports pointers */ + struct ipath_portdata **ipath_pd; + /* sk_buffs used by port 0 eager receive queue */ + struct ipath_skbinfo *ipath_port0_skbinfo; + /* kvirt address of 1st 2k pio buffer */ + void __iomem *ipath_pio2kbase; + /* kvirt address of 1st 4k pio buffer */ + void __iomem *ipath_pio4kbase; + /* + * points to area where PIOavail registers will be DMA'ed. + * Has to be on a page of it's own, because the page will be + * mapped into user program space. This copy is *ONLY* ever + * written by DMA, not by the driver! Need a copy per device + * when we get to multiple devices + */ + volatile __le64 *ipath_pioavailregs_dma; + /* physical address where updates occur */ + dma_addr_t ipath_pioavailregs_phys; + struct _ipath_layer ipath_layer; + /* setup intr */ + int (*ipath_f_intrsetup)(struct ipath_devdata *); + /* fallback to alternate interrupt type if possible */ + int (*ipath_f_intr_fallback)(struct ipath_devdata *); + /* setup on-chip bus config */ + int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *); + /* hard reset chip */ + int (*ipath_f_reset)(struct ipath_devdata *); + int (*ipath_f_get_boardname)(struct ipath_devdata *, char *, + size_t); + void (*ipath_f_init_hwerrors)(struct ipath_devdata *); + void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *, + size_t); + void (*ipath_f_quiet_serdes)(struct ipath_devdata *); + int (*ipath_f_bringup_serdes)(struct ipath_devdata *); + int (*ipath_f_early_init)(struct ipath_devdata *); + void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned); + void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*, + u32, unsigned long); + void (*ipath_f_tidtemplate)(struct ipath_devdata *); + void (*ipath_f_cleanup)(struct ipath_devdata *); + void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); + /* fill out chip-specific fields */ + int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); + /* free irq */ + void (*ipath_f_free_irq)(struct ipath_devdata *); + struct ipath_message_header *(*ipath_f_get_msgheader) + (struct ipath_devdata *, __le32 *); + void (*ipath_f_config_ports)(struct ipath_devdata *, ushort); + int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int); + int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32); + void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16); + void (*ipath_f_read_counters)(struct ipath_devdata *, + struct infinipath_counters *); + void (*ipath_f_xgxs_reset)(struct ipath_devdata *); + /* per chip actions needed for IB Link up/down changes */ + int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64); + + unsigned ipath_lastegr_idx; + struct ipath_ibdev *verbs_dev; + struct timer_list verbs_timer; + /* total dwords sent (summed from counter) */ + u64 ipath_sword; + /* total dwords rcvd (summed from counter) */ + u64 ipath_rword; + /* total packets sent (summed from counter) */ + u64 ipath_spkts; + /* total packets rcvd (summed from counter) */ + u64 ipath_rpkts; + /* ipath_statusp initially points to this. */ + u64 _ipath_status; + /* GUID for this interface, in network order */ + __be64 ipath_guid; + /* + * aggregrate of error bits reported since last cleared, for + * limiting of error reporting + */ + ipath_err_t ipath_lasterror; + /* + * aggregrate of error bits reported since last cleared, for + * limiting of hwerror reporting + */ + ipath_err_t ipath_lasthwerror; + /* errors masked because they occur too fast */ + ipath_err_t ipath_maskederrs; + u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ + /* these 5 fields are used to establish deltas for IB Symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + + /* time in jiffies at which to re-enable maskederrs */ + unsigned long ipath_unmasktime; + /* count of egrfull errors, combined for all ports */ + u64 ipath_last_tidfull; + /* for ipath_qcheck() */ + u64 ipath_lastport0rcv_cnt; + /* template for writing TIDs */ + u64 ipath_tidtemplate; + /* value to write to free TIDs */ + u64 ipath_tidinvalid; + /* IBA6120 rcv interrupt setup */ + u64 ipath_rhdrhead_intr_off; + + /* size of memory at ipath_kregbase */ + u32 ipath_kregsize; + /* number of registers used for pioavail */ + u32 ipath_pioavregs; + /* IPATH_POLL, etc. */ + u32 ipath_flags; + /* ipath_flags driver is waiting for */ + u32 ipath_state_wanted; + /* last buffer for user use, first buf for kernel use is this + * index. */ + u32 ipath_lastport_piobuf; + /* is a stats timer active */ + u32 ipath_stats_timer_active; + /* number of interrupts for this device -- saturates... */ + u32 ipath_int_counter; + /* dwords sent read from counter */ + u32 ipath_lastsword; + /* dwords received read from counter */ + u32 ipath_lastrword; + /* sent packets read from counter */ + u32 ipath_lastspkts; + /* received packets read from counter */ + u32 ipath_lastrpkts; + /* pio bufs allocated per port */ + u32 ipath_pbufsport; + /* if remainder on bufs/port, ports < extrabuf get 1 extra */ + u32 ipath_ports_extrabuf; + u32 ipath_pioupd_thresh; /* update threshold, some chips */ + /* + * number of ports configured as max; zero is set to number chip + * supports, less gives more pio bufs/port, etc. + */ + u32 ipath_cfgports; + /* count of port 0 hdrqfull errors */ + u32 ipath_p0_hdrqfull; + /* port 0 number of receive eager buffers */ + u32 ipath_p0_rcvegrcnt; + + /* + * index of last piobuffer we used. Speeds up searching, by + * starting at this point. Doesn't matter if multiple cpu's use and + * update, last updater is only write that matters. Whenever it + * wraps, we update shadow copies. Need a copy per device when we + * get to multiple devices + */ + u32 ipath_lastpioindex; + u32 ipath_lastpioindexl; + /* max length of freezemsg */ + u32 ipath_freezelen; + /* + * consecutive times we wanted a PIO buffer but were unable to + * get one + */ + u32 ipath_consec_nopiobuf; + /* + * hint that we should update ipath_pioavailshadow before + * looking for a PIO buffer + */ + u32 ipath_upd_pio_shadow; + /* so we can rewrite it after a chip reset */ + u32 ipath_pcibar0; + /* so we can rewrite it after a chip reset */ + u32 ipath_pcibar1; + u32 ipath_x1_fix_tries; + u32 ipath_autoneg_tries; + u32 serdes_first_init_done; + + struct ipath_relock { + atomic_t ipath_relock_timer_active; + struct timer_list ipath_relock_timer; + unsigned int ipath_relock_interval; /* in jiffies */ + } ipath_relock_singleton; + + /* interrupt number */ + int ipath_irq; + /* HT/PCI Vendor ID (here for NodeInfo) */ + u16 ipath_vendorid; + /* HT/PCI Device ID (here for NodeInfo) */ + u16 ipath_deviceid; + /* offset in HT config space of slave/primary interface block */ + u8 ipath_ht_slave_off; + /* for write combining settings */ + unsigned long ipath_wc_cookie; + unsigned long ipath_wc_base; + unsigned long ipath_wc_len; + /* ref count for each pkey */ + atomic_t ipath_pkeyrefs[4]; + /* shadow copy of struct page *'s for exp tid pages */ + struct page **ipath_pageshadow; + /* shadow copy of dma handles for exp tid pages */ + dma_addr_t *ipath_physshadow; + u64 __iomem *ipath_egrtidbase; + /* lock to workaround chip bug 9437 and others */ + spinlock_t ipath_kernel_tid_lock; + spinlock_t ipath_user_tid_lock; + spinlock_t ipath_sendctrl_lock; + /* around ipath_pd and (user ports) port_cnt use (intr vs free) */ + spinlock_t ipath_uctxt_lock; + + /* + * IPATH_STATUS_*, + * this address is mapped readonly into user processes so they can + * get status cheaply, whenever they want. + */ + u64 *ipath_statusp; + /* freeze msg if hw error put chip in freeze */ + char *ipath_freezemsg; + /* pci access data structure */ + struct pci_dev *pcidev; + struct cdev *user_cdev; + struct cdev *diag_cdev; + struct device *user_dev; + struct device *diag_dev; + /* timer used to prevent stats overflow, error throttling, etc. */ + struct timer_list ipath_stats_timer; + /* timer to verify interrupts work, and fallback if possible */ + struct timer_list ipath_intrchk_timer; + void *ipath_dummy_hdrq; /* used after port close */ + dma_addr_t ipath_dummy_hdrq_phys; + + /* SendDMA related entries */ + spinlock_t ipath_sdma_lock; + unsigned long ipath_sdma_status; + unsigned long ipath_sdma_abort_jiffies; + unsigned long ipath_sdma_abort_intr_timeout; + unsigned long ipath_sdma_buf_jiffies; + struct ipath_sdma_desc *ipath_sdma_descq; + u64 ipath_sdma_descq_added; + u64 ipath_sdma_descq_removed; + int ipath_sdma_desc_nreserved; + u16 ipath_sdma_descq_cnt; + u16 ipath_sdma_descq_tail; + u16 ipath_sdma_descq_head; + u16 ipath_sdma_next_intr; + u16 ipath_sdma_reset_wait; + u8 ipath_sdma_generation; + struct tasklet_struct ipath_sdma_abort_task; + struct tasklet_struct ipath_sdma_notify_task; + struct list_head ipath_sdma_activelist; + struct list_head ipath_sdma_notifylist; + atomic_t ipath_sdma_vl15_count; + struct timer_list ipath_sdma_vl15_timer; + + dma_addr_t ipath_sdma_descq_phys; + volatile __le64 *ipath_sdma_head_dma; + dma_addr_t ipath_sdma_head_phys; + + unsigned long ipath_ureg_align; /* user register alignment */ + + struct delayed_work ipath_autoneg_work; + wait_queue_head_t ipath_autoneg_wait; + + /* HoL blocking / user app forward-progress state */ + unsigned ipath_hol_state; + unsigned ipath_hol_next; + struct timer_list ipath_hol_timer; + + /* + * Shadow copies of registers; size indicates read access size. + * Most of them are readonly, but some are write-only register, + * where we manipulate the bits in the shadow copy, and then write + * the shadow copy to infinipath. + * + * We deliberately make most of these 32 bits, since they have + * restricted range. For any that we read, we won't to generate 32 + * bit accesses, since Opteron will generate 2 separate 32 bit HT + * transactions for a 64 bit read, and we want to avoid unnecessary + * HT transactions. + */ + + /* This is the 64 bit group */ + + /* + * shadow of pioavail, check to be sure it's large enough at + * init time. + */ + unsigned long ipath_pioavailshadow[8]; + /* bitmap of send buffers available for the kernel to use with PIO. */ + unsigned long ipath_pioavailkernel[8]; + /* shadow of kr_gpio_out, for rmw ops */ + u64 ipath_gpio_out; + /* shadow the gpio mask register */ + u64 ipath_gpio_mask; + /* shadow the gpio output enable, etc... */ + u64 ipath_extctrl; + /* kr_revision shadow */ + u64 ipath_revision; + /* + * shadow of ibcctrl, for interrupt handling of link changes, + * etc. + */ + u64 ipath_ibcctrl; + /* + * last ibcstatus, to suppress "duplicate" status change messages, + * mostly from 2 to 3 + */ + u64 ipath_lastibcstat; + /* hwerrmask shadow */ + ipath_err_t ipath_hwerrmask; + ipath_err_t ipath_errormask; /* errormask shadow */ + /* interrupt config reg shadow */ + u64 ipath_intconfig; + /* kr_sendpiobufbase value */ + u64 ipath_piobufbase; + /* kr_ibcddrctrl shadow */ + u64 ipath_ibcddrctrl; + + /* these are the "32 bit" regs */ + + /* + * number of GUIDs in the flash for this interface; may need some + * rethinking for setting on other ifaces + */ + u32 ipath_nguid; + /* + * the following two are 32-bit bitmasks, but {test,clear,set}_bit + * all expect bit fields to be "unsigned long" + */ + /* shadow kr_rcvctrl */ + unsigned long ipath_rcvctrl; + /* shadow kr_sendctrl */ + unsigned long ipath_sendctrl; + /* to not count armlaunch after cancel */ + unsigned long ipath_lastcancel; + /* count cases where special trigger was needed (double write) */ + unsigned long ipath_spectriggerhit; + + /* value we put in kr_rcvhdrcnt */ + u32 ipath_rcvhdrcnt; + /* value we put in kr_rcvhdrsize */ + u32 ipath_rcvhdrsize; + /* value we put in kr_rcvhdrentsize */ + u32 ipath_rcvhdrentsize; + /* offset of last entry in rcvhdrq */ + u32 ipath_hdrqlast; + /* kr_portcnt value */ + u32 ipath_portcnt; + /* kr_pagealign value */ + u32 ipath_palign; + /* number of "2KB" PIO buffers */ + u32 ipath_piobcnt2k; + /* size in bytes of "2KB" PIO buffers */ + u32 ipath_piosize2k; + /* number of "4KB" PIO buffers */ + u32 ipath_piobcnt4k; + /* size in bytes of "4KB" PIO buffers */ + u32 ipath_piosize4k; + u32 ipath_pioreserved; /* reserved special-inkernel; */ + /* kr_rcvegrbase value */ + u32 ipath_rcvegrbase; + /* kr_rcvegrcnt value */ + u32 ipath_rcvegrcnt; + /* kr_rcvtidbase value */ + u32 ipath_rcvtidbase; + /* kr_rcvtidcnt value */ + u32 ipath_rcvtidcnt; + /* kr_sendregbase */ + u32 ipath_sregbase; + /* kr_userregbase */ + u32 ipath_uregbase; + /* kr_counterregbase */ + u32 ipath_cregbase; + /* shadow the control register contents */ + u32 ipath_control; + /* PCI revision register (HTC rev on FPGA) */ + u32 ipath_pcirev; + + /* chip address space used by 4k pio buffers */ + u32 ipath_4kalign; + /* The MTU programmed for this unit */ + u32 ipath_ibmtu; + /* + * The max size IB packet, included IB headers that we can send. + * Starts same as ipath_piosize, but is affected when ibmtu is + * changed, or by size of eager buffers + */ + u32 ipath_ibmaxlen; + /* + * ibmaxlen at init time, limited by chip and by receive buffer + * size. Not changed after init. + */ + u32 ipath_init_ibmaxlen; + /* size of each rcvegrbuffer */ + u32 ipath_rcvegrbufsize; + /* localbus width (1, 2,4,8,16,32) from config space */ + u32 ipath_lbus_width; + /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */ + u32 ipath_lbus_speed; + /* + * number of sequential ibcstatus change for polling active/quiet + * (i.e., link not coming up). + */ + u32 ipath_ibpollcnt; + /* low and high portions of MSI capability/vector */ + u32 ipath_msi_lo; + /* saved after PCIe init for restore after reset */ + u32 ipath_msi_hi; + /* MSI data (vector) saved for restore */ + u16 ipath_msi_data; + /* MLID programmed for this instance */ + u16 ipath_mlid; + /* LID programmed for this instance */ + u16 ipath_lid; + /* list of pkeys programmed; 0 if not set */ + u16 ipath_pkeys[4]; + /* + * ASCII serial number, from flash, large enough for original + * all digit strings, and longer QLogic serial number format + */ + u8 ipath_serial[16]; + /* human readable board version */ + u8 ipath_boardversion[96]; + u8 ipath_lbus_info[32]; /* human readable localbus info */ + /* chip major rev, from ipath_revision */ + u8 ipath_majrev; + /* chip minor rev, from ipath_revision */ + u8 ipath_minrev; + /* board rev, from ipath_revision */ + u8 ipath_boardrev; + /* saved for restore after reset */ + u8 ipath_pci_cacheline; + /* LID mask control */ + u8 ipath_lmc; + /* link width supported */ + u8 ipath_link_width_supported; + /* link speed supported */ + u8 ipath_link_speed_supported; + u8 ipath_link_width_enabled; + u8 ipath_link_speed_enabled; + u8 ipath_link_width_active; + u8 ipath_link_speed_active; + /* Rx Polarity inversion (compensate for ~tx on partner) */ + u8 ipath_rx_pol_inv; + + u8 ipath_r_portenable_shift; + u8 ipath_r_intravail_shift; + u8 ipath_r_tailupd_shift; + u8 ipath_r_portcfg_shift; + + /* unit # of this chip, if present */ + int ipath_unit; + + /* local link integrity counter */ + u32 ipath_lli_counter; + /* local link integrity errors */ + u32 ipath_lli_errors; + /* + * Above counts only cases where _successive_ LocalLinkIntegrity + * errors were seen in the receive headers of kern-packets. + * Below are the three (monotonically increasing) counters + * maintained via GPIO interrupts on iba6120-rev2. + */ + u32 ipath_rxfc_unsupvl_errs; + u32 ipath_overrun_thresh_errs; + u32 ipath_lli_errs; + + /* + * Not all devices managed by a driver instance are the same + * type, so these fields must be per-device. + */ + u64 ipath_i_bitsextant; + ipath_err_t ipath_e_bitsextant; + ipath_err_t ipath_hwe_bitsextant; + + /* + * Below should be computable from number of ports, + * since they are never modified. + */ + u64 ipath_i_rcvavail_mask; + u64 ipath_i_rcvurg_mask; + u16 ipath_i_rcvurg_shift; + u16 ipath_i_rcvavail_shift; + + /* + * Register bits for selecting i2c direction and values, used for + * I2C serial flash. + */ + u8 ipath_gpio_sda_num; + u8 ipath_gpio_scl_num; + u8 ipath_i2c_chain_type; + u64 ipath_gpio_sda; + u64 ipath_gpio_scl; + + /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ + spinlock_t ipath_gpio_lock; + + /* + * IB link and linktraining states and masks that vary per chip in + * some way. Set at init, to avoid each IB status change interrupt + */ + u8 ibcs_ls_shift; + u8 ibcs_lts_mask; + u32 ibcs_mask; + u32 ib_init; + u32 ib_arm; + u32 ib_active; + + u16 ipath_rhf_offset; /* offset of RHF within receive header entry */ + + /* + * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol + * reg. Changes for IBA7220 + */ + u8 ibcc_lic_mask; /* LinkInitCmd */ + u8 ibcc_lc_shift; /* LinkCmd */ + u8 ibcc_mpl_shift; /* Maxpktlen */ + + u8 delay_mult; + + /* used to override LED behavior */ + u8 ipath_led_override; /* Substituted for normal value, if non-zero */ + u16 ipath_led_override_timeoff; /* delta to next timer event */ + u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */ + u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */ + atomic_t ipath_led_override_timer_active; + /* Used to flash LEDs in override mode */ + struct timer_list ipath_led_override_timer; + + /* Support (including locks) for EEPROM logging of errors and time */ + /* control access to actual counters, timer */ + spinlock_t ipath_eep_st_lock; + /* control high-level access to EEPROM */ + struct mutex ipath_eep_lock; + /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ + uint64_t ipath_traffic_wds; + /* active time is kept in seconds, but logged in hours */ + atomic_t ipath_active_time; + /* Below are nominal shadow of EEPROM, new since last EEPROM update */ + uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT]; + uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT]; + uint16_t ipath_eep_hrs; + /* + * masks for which bits of errs, hwerrs that cause + * each of the counters to increment. + */ + struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; + + /* interrupt mitigation reload register info */ + u16 ipath_jint_idle_ticks; /* idle clock ticks */ + u16 ipath_jint_max_packets; /* max packets across all ports */ + + /* + * lock for access to SerDes, and flags to sequence preset + * versus steady-state. 7220-only at the moment. + */ + spinlock_t ipath_sdepb_lock; + u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */ +}; + +/* ipath_hol_state values (stopping/starting user proc, send flushing) */ +#define IPATH_HOL_UP 0 +#define IPATH_HOL_DOWN 1 +/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */ +#define IPATH_HOL_DOWNSTOP 0 +#define IPATH_HOL_DOWNCONT 1 + +/* bit positions for sdma_status */ +#define IPATH_SDMA_ABORTING 0 +#define IPATH_SDMA_DISARMED 1 +#define IPATH_SDMA_DISABLED 2 +#define IPATH_SDMA_LAYERBUF 3 +#define IPATH_SDMA_RUNNING 30 +#define IPATH_SDMA_SHUTDOWN 31 + +/* bit combinations that correspond to abort states */ +#define IPATH_SDMA_ABORT_NONE 0 +#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING) +#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \ + (1UL << IPATH_SDMA_DISARMED)) +#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \ + (1UL << IPATH_SDMA_DISABLED)) +#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \ + (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED)) +#define IPATH_SDMA_ABORT_MASK ((1UL<private_data)->pd +#define subport_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->subport +#define tidcursor_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->tidcursor +#define user_sdma_queue_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->pq + +/* + * values for ipath_flags + */ + /* chip can report link latency (IB 1.2) */ +#define IPATH_HAS_LINK_LATENCY 0x1 + /* The chip is up and initted */ +#define IPATH_INITTED 0x2 + /* set if any user code has set kr_rcvhdrsize */ +#define IPATH_RCVHDRSZ_SET 0x4 + /* The chip is present and valid for accesses */ +#define IPATH_PRESENT 0x8 + /* HT link0 is only 8 bits wide, ignore upper byte crc + * errors, etc. */ +#define IPATH_8BIT_IN_HT0 0x10 + /* HT link1 is only 8 bits wide, ignore upper byte crc + * errors, etc. */ +#define IPATH_8BIT_IN_HT1 0x20 + /* The link is down */ +#define IPATH_LINKDOWN 0x40 + /* The link level is up (0x11) */ +#define IPATH_LINKINIT 0x80 + /* The link is in the armed (0x21) state */ +#define IPATH_LINKARMED 0x100 + /* The link is in the active (0x31) state */ +#define IPATH_LINKACTIVE 0x200 + /* link current state is unknown */ +#define IPATH_LINKUNK 0x400 + /* Write combining flush needed for PIO */ +#define IPATH_PIO_FLUSH_WC 0x1000 + /* DMA Receive tail pointer */ +#define IPATH_NODMA_RTAIL 0x2000 + /* no IB cable, or no device on IB cable */ +#define IPATH_NOCABLE 0x4000 + /* Supports port zero per packet receive interrupts via + * GPIO */ +#define IPATH_GPIO_INTR 0x8000 + /* uses the coded 4byte TID, not 8 byte */ +#define IPATH_4BYTE_TID 0x10000 + /* packet/word counters are 32 bit, else those 4 counters + * are 64bit */ +#define IPATH_32BITCOUNTERS 0x20000 + /* Interrupt register is 64 bits */ +#define IPATH_INTREG_64 0x40000 + /* can miss port0 rx interrupts */ +#define IPATH_DISABLED 0x80000 /* administratively disabled */ + /* Use GPIO interrupts for new counters */ +#define IPATH_GPIO_ERRINTRS 0x100000 +#define IPATH_SWAP_PIOBUFS 0x200000 + /* Supports Send DMA */ +#define IPATH_HAS_SEND_DMA 0x400000 + /* Supports Send Count (not just word count) in PBC */ +#define IPATH_HAS_PBC_CNT 0x800000 + /* Suppress heartbeat, even if turning off loopback */ +#define IPATH_NO_HRTBT 0x1000000 +#define IPATH_HAS_THRESH_UPDATE 0x4000000 +#define IPATH_HAS_MULT_IB_SPEED 0x8000000 +#define IPATH_IB_AUTONEG_INPROG 0x10000000 +#define IPATH_IB_AUTONEG_FAILED 0x20000000 + /* Linkdown-disable intentionally, Do not attempt to bring up */ +#define IPATH_IB_LINK_DISABLED 0x40000000 +#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */ + +/* Bits in GPIO for the added interrupts */ +#define IPATH_GPIO_PORT0_BIT 2 +#define IPATH_GPIO_RXUVL_BIT 3 +#define IPATH_GPIO_OVRUN_BIT 4 +#define IPATH_GPIO_LLI_BIT 5 +#define IPATH_GPIO_ERRINTR_MASK 0x38 + +/* portdata flag bit offsets */ + /* waiting for a packet to arrive */ +#define IPATH_PORT_WAITING_RCV 2 + /* master has not finished initializing */ +#define IPATH_PORT_MASTER_UNINIT 4 + /* waiting for an urgent packet to arrive */ +#define IPATH_PORT_WAITING_URG 5 + +/* free up any allocated data at closes */ +void ipath_free_data(struct ipath_portdata *dd); +u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *); +void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, + unsigned len, int avail); +void ipath_init_iba6110_funcs(struct ipath_devdata *); +void ipath_get_eeprom_info(struct ipath_devdata *); +int ipath_update_eeprom_log(struct ipath_devdata *dd); +void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); +u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); +void ipath_disarm_senderrbufs(struct ipath_devdata *); +void ipath_force_pio_avail_update(struct ipath_devdata *); +void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); + +/* + * Set LED override, only the two LSBs have "public" meaning, but + * any non-zero value substitutes them for the Link and LinkTrain + * LED states. + */ +#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ +#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */ +void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); + +/* send dma routines */ +int setup_sdma(struct ipath_devdata *); +void teardown_sdma(struct ipath_devdata *); +void ipath_restart_sdma(struct ipath_devdata *); +void ipath_sdma_intr(struct ipath_devdata *); +int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *, + u32, struct ipath_verbs_txreq *); +/* ipath_sdma_lock should be locked before calling this. */ +int ipath_sdma_make_progress(struct ipath_devdata *dd); + +/* must be called under ipath_sdma_lock */ +static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd) +{ + return dd->ipath_sdma_descq_cnt - + (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) - + 1 - dd->ipath_sdma_desc_nreserved; +} + +static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt) +{ + dd->ipath_sdma_desc_nreserved += cnt; +} + +static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt) +{ + dd->ipath_sdma_desc_nreserved -= cnt; +} + +/* + * number of words used for protocol header if not set by ipath_userinit(); + */ +#define IPATH_DFLT_RCVHDRSIZE 9 + +int ipath_get_user_pages(unsigned long, size_t, struct page **); +void ipath_release_user_pages(struct page **, size_t); +void ipath_release_user_pages_on_close(struct page **, size_t); +int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); +int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int); +int ipath_tempsense_read(struct ipath_devdata *, u8 regnum); +int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data); + +/* these are used for the registers that vary with port */ +void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, + unsigned, u64); + +/* + * We could have a single register get/put routine, that takes a group type, + * but this is somewhat clearer and cleaner. It also gives us some error + * checking. 64 bit register reads should always work, but are inefficient + * on opteron (the northbridge always generates 2 separate HT 32 bit reads), + * so we use kreg32 wherever possible. User register and counter register + * reads are always 32 bit reads, so only one form of those routines. + */ + +/* + * At the moment, none of the s-registers are writable, so no + * ipath_write_sreg(). + */ + +/** + * ipath_read_ureg32 - read 32-bit virtualized per-port register + * @dd: device + * @regno: register number + * @port: port number + * + * Return the contents of a register that is virtualized to be per port. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, + ipath_ureg regno, int port) +{ + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) + return 0; + + return readl(regno + (u64 __iomem *) + (dd->ipath_uregbase + + (char __iomem *)dd->ipath_kregbase + + dd->ipath_ureg_align * port)); +} + +/** + * ipath_write_ureg - write 32-bit virtualized per-port register + * @dd: device + * @regno: register number + * @value: value + * @port: port + * + * Write the contents of a register that is virtualized to be per port. + */ +static inline void ipath_write_ureg(const struct ipath_devdata *dd, + ipath_ureg regno, u64 value, int port) +{ + u64 __iomem *ubase = (u64 __iomem *) + (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + + dd->ipath_ureg_align * port); + if (dd->ipath_kregbase) + writeq(value, &ubase[regno]); +} + +static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd, + ipath_kreg regno) +{ + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) + return -1; + return readl((u32 __iomem *) & dd->ipath_kregbase[regno]); +} + +static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd, + ipath_kreg regno) +{ + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) + return -1; + + return readq(&dd->ipath_kregbase[regno]); +} + +static inline void ipath_write_kreg(const struct ipath_devdata *dd, + ipath_kreg regno, u64 value) +{ + if (dd->ipath_kregbase) + writeq(value, &dd->ipath_kregbase[regno]); +} + +static inline u64 ipath_read_creg(const struct ipath_devdata *dd, + ipath_sreg regno) +{ + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) + return 0; + + return readq(regno + (u64 __iomem *) + (dd->ipath_cregbase + + (char __iomem *)dd->ipath_kregbase)); +} + +static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, + ipath_sreg regno) +{ + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) + return 0; + return readl(regno + (u64 __iomem *) + (dd->ipath_cregbase + + (char __iomem *)dd->ipath_kregbase)); +} + +static inline void ipath_write_creg(const struct ipath_devdata *dd, + ipath_creg regno, u64 value) +{ + if (dd->ipath_kregbase) + writeq(value, regno + (u64 __iomem *) + (dd->ipath_cregbase + + (char __iomem *)dd->ipath_kregbase)); +} + +static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd) +{ + *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL; +} + +static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd) +{ + return (u32) le64_to_cpu(*((volatile __le64 *) + pd->port_rcvhdrtail_kvaddr)); +} + +static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd) +{ + const struct ipath_devdata *dd = pd->port_dd; + u32 hdrqtail; + + if (dd->ipath_flags & IPATH_NODMA_RTAIL) { + __le32 *rhf_addr; + u32 seq; + + rhf_addr = (__le32 *) pd->port_rcvhdrq + + pd->port_head + dd->ipath_rhf_offset; + seq = ipath_hdrget_seq(rhf_addr); + hdrqtail = pd->port_head; + if (seq == pd->port_seq_cnt) + hdrqtail++; + } else + hdrqtail = ipath_get_rcvhdrtail(pd); + + return hdrqtail; +} + +static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r) +{ + return (dd->ipath_flags & IPATH_INTREG_64) ? + ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r); +} + +/* + * from contents of IBCStatus (or a saved copy), return linkstate + * Report ACTIVE_DEFER as ACTIVE, because we treat them the same + * everywhere, anyway (and should be, for almost all purposes). + */ +static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs) +{ + u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) & + INFINIPATH_IBCS_LINKSTATE_MASK; + if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER) + state = INFINIPATH_IBCS_L_STATE_ACTIVE; + return state; +} + +/* from contents of IBCStatus (or a saved copy), return linktrainingstate */ +static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs) +{ + return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & + dd->ibcs_lts_mask; +} + +/* + * from contents of IBCStatus (or a saved copy), return logical link state + * combination of link state and linktraining state (down, active, init, + * arm, etc. + */ +static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs) +{ + u32 ibs; + ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & + dd->ibcs_lts_mask; + ibs |= (u32)(ibcs & + (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift)); + return ibs; +} + +/* + * sysfs interface. + */ + +struct device_driver; + +extern const char ib_ipath_version[]; + +extern const struct attribute_group *ipath_driver_attr_groups[]; + +int ipath_device_create_group(struct device *, struct ipath_devdata *); +void ipath_device_remove_group(struct device *, struct ipath_devdata *); +int ipath_expose_reset(struct device *); + +int ipath_init_ipathfs(void); +void ipath_exit_ipathfs(void); +int ipathfs_add_device(struct ipath_devdata *); +int ipathfs_remove_device(struct ipath_devdata *); + +/* + * dma_addr wrappers - all 0's invalid for hw + */ +dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long, + size_t, int); +dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); +const char *ipath_get_unit_name(int unit); + +/* + * Flush write combining store buffers (if present) and perform a write + * barrier. + */ +#if defined(CONFIG_X86_64) +#define ipath_flush_wc() asm volatile("sfence" ::: "memory") +#else +#define ipath_flush_wc() wmb() +#endif + +extern unsigned ipath_debug; /* debugging bit mask */ +extern unsigned ipath_linkrecovery; +extern unsigned ipath_mtu4096; +extern struct mutex ipath_mutex; + +#define IPATH_DRV_NAME "ib_ipath" +#define IPATH_MAJOR 233 +#define IPATH_USER_MINOR_BASE 0 +#define IPATH_DIAGPKT_MINOR 127 +#define IPATH_DIAG_MINOR_BASE 129 +#define IPATH_NMINORS 255 + +#define ipath_dev_err(dd,fmt,...) \ + do { \ + const struct ipath_devdata *__dd = (dd); \ + if (__dd->pcidev) \ + dev_err(&__dd->pcidev->dev, "%s: " fmt, \ + ipath_get_unit_name(__dd->ipath_unit), \ + ##__VA_ARGS__); \ + else \ + printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \ + ipath_get_unit_name(__dd->ipath_unit), \ + ##__VA_ARGS__); \ + } while (0) + +#if _IPATH_DEBUGGING + +# define __IPATH_DBG_WHICH(which,fmt,...) \ + do { \ + if (unlikely(ipath_debug & (which))) \ + printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \ + __func__,##__VA_ARGS__); \ + } while(0) + +# define ipath_dbg(fmt,...) \ + __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__) +# define ipath_cdbg(which,fmt,...) \ + __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__) + +#else /* ! _IPATH_DEBUGGING */ + +# define ipath_dbg(fmt,...) +# define ipath_cdbg(which,fmt,...) + +#endif /* _IPATH_DEBUGGING */ + +/* + * this is used for formatting hw error messages... + */ +struct ipath_hwerror_msgs { + u64 mask; + const char *msg; +}; + +#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b } + +/* in ipath_intr.c... */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t lmsg); + +#endif /* _IPATH_KERNEL_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c new file mode 100644 index 00000000..c0e933fe --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "ipath_verbs.h" +#include "ipath_kernel.h" + +/** + * ipath_alloc_lkey - allocate an lkey + * @rkt: lkey table in which to allocate the lkey + * @mr: memory region that this lkey protects + * + * Returns 1 if successful, otherwise returns 0. + */ + +int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) +{ + unsigned long flags; + u32 r; + u32 n; + int ret; + + spin_lock_irqsave(&rkt->lock, flags); + + /* Find the next available LKEY */ + r = n = rkt->next; + for (;;) { + if (rkt->table[r] == NULL) + break; + r = (r + 1) & (rkt->max - 1); + if (r == n) { + spin_unlock_irqrestore(&rkt->lock, flags); + ipath_dbg("LKEY table full\n"); + ret = 0; + goto bail; + } + } + rkt->next = (r + 1) & (rkt->max - 1); + /* + * Make sure lkey is never zero which is reserved to indicate an + * unrestricted LKEY. + */ + rkt->gen++; + mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) | + ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen) + << 8); + if (mr->lkey == 0) { + mr->lkey |= 1 << 8; + rkt->gen++; + } + rkt->table[r] = mr; + spin_unlock_irqrestore(&rkt->lock, flags); + + ret = 1; + +bail: + return ret; +} + +/** + * ipath_free_lkey - free an lkey + * @rkt: table from which to free the lkey + * @lkey: lkey id to free + */ +void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey) +{ + unsigned long flags; + u32 r; + + if (lkey == 0) + return; + r = lkey >> (32 - ib_ipath_lkey_table_size); + spin_lock_irqsave(&rkt->lock, flags); + rkt->table[r] = NULL; + spin_unlock_irqrestore(&rkt->lock, flags); +} + +/** + * ipath_lkey_ok - check IB SGE for validity and initialize + * @rkt: table containing lkey to check SGE against + * @isge: outgoing internal SGE + * @sge: SGE to check + * @acc: access flags + * + * Return 1 if valid and successful, otherwise returns 0. + * + * Check the IB SGE for validity and initialize our internal version + * of it. + */ +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, + struct ib_sge *sge, int acc) +{ + struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct ipath_mregion *mr; + unsigned n, m; + size_t off; + int ret; + + /* + * We use LKEY == zero for kernel virtual addresses + * (see ipath_get_dma_mr and ipath_dma.c). + */ + if (sge->lkey == 0) { + /* always a kernel port, no locking needed */ + struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + + if (pd->user) { + ret = 0; + goto bail; + } + isge->mr = NULL; + isge->vaddr = (void *) sge->addr; + isge->length = sge->length; + isge->sge_length = sge->length; + ret = 1; + goto bail; + } + mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; + if (unlikely(mr == NULL || mr->lkey != sge->lkey || + qp->ibqp.pd != mr->pd)) { + ret = 0; + goto bail; + } + + off = sge->addr - mr->user_base; + if (unlikely(sge->addr < mr->user_base || + off + sge->length > mr->length || + (mr->access_flags & acc) != acc)) { + ret = 0; + goto bail; + } + + off += mr->offset; + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= IPATH_SEGSZ) { + m++; + n = 0; + } + } + isge->mr = mr; + isge->vaddr = mr->map[m]->segs[n].vaddr + off; + isge->length = mr->map[m]->segs[n].length - off; + isge->sge_length = sge->length; + isge->m = m; + isge->n = n; + + ret = 1; + +bail: + return ret; +} + +/** + * ipath_rkey_ok - check the IB virtual address, length, and RKEY + * @dev: infiniband device + * @ss: SGE state + * @len: length of data + * @vaddr: virtual address to place data + * @rkey: rkey to check + * @acc: access flags + * + * Return 1 if successful, otherwise 0. + */ +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, + u32 len, u64 vaddr, u32 rkey, int acc) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_lkey_table *rkt = &dev->lk_table; + struct ipath_sge *sge = &ss->sge; + struct ipath_mregion *mr; + unsigned n, m; + size_t off; + int ret; + + /* + * We use RKEY == zero for kernel virtual addresses + * (see ipath_get_dma_mr and ipath_dma.c). + */ + if (rkey == 0) { + /* always a kernel port, no locking needed */ + struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + + if (pd->user) { + ret = 0; + goto bail; + } + sge->mr = NULL; + sge->vaddr = (void *) vaddr; + sge->length = len; + sge->sge_length = len; + ss->sg_list = NULL; + ss->num_sge = 1; + ret = 1; + goto bail; + } + + mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; + if (unlikely(mr == NULL || mr->lkey != rkey || + qp->ibqp.pd != mr->pd)) { + ret = 0; + goto bail; + } + + off = vaddr - mr->iova; + if (unlikely(vaddr < mr->iova || off + len > mr->length || + (mr->access_flags & acc) == 0)) { + ret = 0; + goto bail; + } + + off += mr->offset; + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= IPATH_SEGSZ) { + m++; + n = 0; + } + } + sge->mr = mr; + sge->vaddr = mr->map[m]->segs[n].vaddr + off; + sge->length = mr->map[m]->segs[n].length - off; + sge->sge_length = len; + sge->m = m; + sge->n = n; + ss->sg_list = NULL; + ss->num_sge = 1; + + ret = 1; + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c new file mode 100644 index 00000000..43f2d042 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -0,0 +1,1507 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "ipath_kernel.h" +#include "ipath_verbs.h" +#include "ipath_common.h" + +#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) +#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) +#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) + +static int reply(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +static int recv_subn_get_nodedescription(struct ib_smp *smp, + struct ib_device *ibdev) +{ + if (smp->attr_mod) + smp->status |= IB_SMP_INVALID_FIELD; + + memcpy(smp->data, ibdev->node_desc, sizeof(smp->data)); + + return reply(smp); +} + +struct nodeinfo { + u8 base_version; + u8 class_version; + u8 node_type; + u8 num_ports; + __be64 sys_guid; + __be64 node_guid; + __be64 port_guid; + __be16 partition_cap; + __be16 device_id; + __be32 revision; + u8 local_port_num; + u8 vendor_id[3]; +} __attribute__ ((packed)); + +static int recv_subn_get_nodeinfo(struct ib_smp *smp, + struct ib_device *ibdev, u8 port) +{ + struct nodeinfo *nip = (struct nodeinfo *)&smp->data; + struct ipath_devdata *dd = to_idev(ibdev)->dd; + u32 vendor, majrev, minrev; + + /* GUID 0 is illegal */ + if (smp->attr_mod || (dd->ipath_guid == 0)) + smp->status |= IB_SMP_INVALID_FIELD; + + nip->base_version = 1; + nip->class_version = 1; + nip->node_type = 1; /* channel adapter */ + /* + * XXX The num_ports value will need a layer function to get + * the value if we ever have more than one IB port on a chip. + * We will also need to get the GUID for the port. + */ + nip->num_ports = ibdev->phys_port_cnt; + /* This is already in network order */ + nip->sys_guid = to_idev(ibdev)->sys_image_guid; + nip->node_guid = dd->ipath_guid; + nip->port_guid = dd->ipath_guid; + nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); + nip->device_id = cpu_to_be16(dd->ipath_deviceid); + majrev = dd->ipath_majrev; + minrev = dd->ipath_minrev; + nip->revision = cpu_to_be32((majrev << 16) | minrev); + nip->local_port_num = port; + vendor = dd->ipath_vendorid; + nip->vendor_id[0] = IPATH_SRC_OUI_1; + nip->vendor_id[1] = IPATH_SRC_OUI_2; + nip->vendor_id[2] = IPATH_SRC_OUI_3; + + return reply(smp); +} + +static int recv_subn_get_guidinfo(struct ib_smp *smp, + struct ib_device *ibdev) +{ + u32 startgx = 8 * be32_to_cpu(smp->attr_mod); + __be64 *p = (__be64 *) smp->data; + + /* 32 blocks of 8 64-bit GUIDs per block */ + + memset(smp->data, 0, sizeof(smp->data)); + + /* + * We only support one GUID for now. If this changes, the + * portinfo.guid_cap field needs to be updated too. + */ + if (startgx == 0) { + __be64 g = to_idev(ibdev)->dd->ipath_guid; + if (g == 0) + /* GUID 0 is illegal */ + smp->status |= IB_SMP_INVALID_FIELD; + else + /* The first is a copy of the read-only HW GUID. */ + *p = g; + } else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static void set_link_width_enabled(struct ipath_devdata *dd, u32 w) +{ + (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w); +} + +static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s) +{ + (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s); +} + +static int get_overrunthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; +} + +/** + * set_overrunthreshold - set the overrun threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +static int get_phyerrthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; +} + +/** + * set_phyerrthreshold - set the physical error threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +/** + * get_linkdowndefaultstate - get the default linkdown state + * @dd: the infinipath device + * + * Returns zero if the default is POLL, 1 if the default is SLEEP. + */ +static int get_linkdowndefaultstate(struct ipath_devdata *dd) +{ + return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); +} + +static int recv_subn_get_portinfo(struct ib_smp *smp, + struct ib_device *ibdev, u8 port) +{ + struct ipath_ibdev *dev; + struct ipath_devdata *dd; + struct ib_port_info *pip = (struct ib_port_info *)smp->data; + u16 lid; + u8 ibcstat; + u8 mtu; + int ret; + + if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) { + smp->status |= IB_SMP_INVALID_FIELD; + ret = reply(smp); + goto bail; + } + + dev = to_idev(ibdev); + dd = dev->dd; + + /* Clear all fields. Only set the non-zero fields. */ + memset(smp->data, 0, sizeof(smp->data)); + + /* Only return the mkey if the protection field allows it. */ + if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey || + dev->mkeyprot == 0) + pip->mkey = dev->mkey; + pip->gid_prefix = dev->gid_prefix; + lid = dd->ipath_lid; + pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; + pip->sm_lid = cpu_to_be16(dev->sm_lid); + pip->cap_mask = cpu_to_be32(dev->port_cap_flags); + /* pip->diag_code; */ + pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period); + pip->local_port_num = port; + pip->link_width_enabled = dd->ipath_link_width_enabled; + pip->link_width_supported = dd->ipath_link_width_supported; + pip->link_width_active = dd->ipath_link_width_active; + pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4; + ibcstat = dd->ipath_lastibcstat; + /* map LinkState to IB portinfo values. */ + pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1; + + pip->portphysstate_linkdown = + (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) | + (get_linkdowndefaultstate(dd) ? 1 : 2); + pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc; + pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) | + dd->ipath_link_speed_enabled; + switch (dd->ipath_ibmtu) { + case 4096: + mtu = IB_MTU_4096; + break; + case 2048: + mtu = IB_MTU_2048; + break; + case 1024: + mtu = IB_MTU_1024; + break; + case 512: + mtu = IB_MTU_512; + break; + case 256: + mtu = IB_MTU_256; + break; + default: /* oops, something is wrong */ + mtu = IB_MTU_2048; + break; + } + pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl; + pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */ + pip->vl_high_limit = dev->vl_high_limit; + /* pip->vl_arb_high_cap; // only one VL */ + /* pip->vl_arb_low_cap; // only one VL */ + /* InitTypeReply = 0 */ + /* our mtu cap depends on whether 4K MTU enabled or not */ + pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; + /* HCAs ignore VLStallCount and HOQLife */ + /* pip->vlstallcnt_hoqlife; */ + pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */ + pip->mkey_violations = cpu_to_be16(dev->mkey_violations); + /* P_KeyViolations are counted by hardware. */ + pip->pkey_violations = + cpu_to_be16((ipath_get_cr_errpkey(dd) - + dev->z_pkey_violations) & 0xFFFF); + pip->qkey_violations = cpu_to_be16(dev->qkey_violations); + /* Only the hardware GUID is supported for now */ + pip->guid_cap = 1; + pip->clientrereg_resv_subnetto = dev->subnet_timeout; + /* 32.768 usec. response time (guessing) */ + pip->resv_resptimevalue = 3; + pip->localphyerrors_overrunerrors = + (get_phyerrthreshold(dd) << 4) | + get_overrunthreshold(dd); + /* pip->max_credit_hint; */ + if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) { + u32 v; + + v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY); + pip->link_roundtrip_latency[0] = v >> 16; + pip->link_roundtrip_latency[1] = v >> 8; + pip->link_roundtrip_latency[2] = v; + } + + ret = reply(smp); + +bail: + return ret; +} + +/** + * get_pkeys - return the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the pkey table is placed here + */ +static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) +{ + /* always a kernel port, no locking needed */ + struct ipath_portdata *pd = dd->ipath_pd[0]; + + memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); + + return 0; +} + +static int recv_subn_get_pkeytable(struct ib_smp *smp, + struct ib_device *ibdev) +{ + u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); + u16 *p = (u16 *) smp->data; + __be16 *q = (__be16 *) smp->data; + + /* 64 blocks of 32 16-bit P_Key entries */ + + memset(smp->data, 0, sizeof(smp->data)); + if (startpx == 0) { + struct ipath_ibdev *dev = to_idev(ibdev); + unsigned i, n = ipath_get_npkeys(dev->dd); + + get_pkeys(dev->dd, p); + + for (i = 0; i < n; i++) + q[i] = cpu_to_be16(p[i]); + } else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static int recv_subn_set_guidinfo(struct ib_smp *smp, + struct ib_device *ibdev) +{ + /* The only GUID we support is the first read-only entry. */ + return recv_subn_get_guidinfo(smp, ibdev); +} + +/** + * set_linkdowndefaultstate - set the default linkdown state + * @dd: the infinipath device + * @sleep: the new state + * + * Note that this will only take effect when the link state changes. + */ +static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep) +{ + if (sleep) + dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + else + dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + return 0; +} + +/** + * recv_subn_set_portinfo - set port information + * @smp: the incoming SM packet + * @ibdev: the infiniband device + * @port: the port on the device + * + * Set Portinfo (see ch. 14.2.5.6). + */ +static int recv_subn_set_portinfo(struct ib_smp *smp, + struct ib_device *ibdev, u8 port) +{ + struct ib_port_info *pip = (struct ib_port_info *)smp->data; + struct ib_event event; + struct ipath_ibdev *dev; + struct ipath_devdata *dd; + char clientrereg = 0; + u16 lid, smlid; + u8 lwe; + u8 lse; + u8 state; + u16 lstate; + u32 mtu; + int ret, ore; + + if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) + goto err; + + dev = to_idev(ibdev); + dd = dev->dd; + event.device = ibdev; + event.element.port_num = port; + + dev->mkey = pip->mkey; + dev->gid_prefix = pip->gid_prefix; + dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); + + lid = be16_to_cpu(pip->lid); + if (dd->ipath_lid != lid || + dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) { + /* Must be a valid unicast LID address. */ + if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) + goto err; + ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7); + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + + smlid = be16_to_cpu(pip->sm_lid); + if (smlid != dev->sm_lid) { + /* Must be a valid unicast LID address. */ + if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE) + goto err; + dev->sm_lid = smlid; + event.event = IB_EVENT_SM_CHANGE; + ib_dispatch_event(&event); + } + + /* Allow 1x or 4x to be set (see 14.2.6.6). */ + lwe = pip->link_width_enabled; + if (lwe) { + if (lwe == 0xFF) + lwe = dd->ipath_link_width_supported; + else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported)) + goto err; + set_link_width_enabled(dd, lwe); + } + + /* Allow 2.5 or 5.0 Gbs. */ + lse = pip->linkspeedactive_enabled & 0xF; + if (lse) { + if (lse == 15) + lse = dd->ipath_link_speed_supported; + else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported)) + goto err; + set_link_speed_enabled(dd, lse); + } + + /* Set link down default state. */ + switch (pip->portphysstate_linkdown & 0xF) { + case 0: /* NOP */ + break; + case 1: /* SLEEP */ + if (set_linkdowndefaultstate(dd, 1)) + goto err; + break; + case 2: /* POLL */ + if (set_linkdowndefaultstate(dd, 0)) + goto err; + break; + default: + goto err; + } + + dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6; + dev->vl_high_limit = pip->vl_high_limit; + + switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) { + case IB_MTU_256: + mtu = 256; + break; + case IB_MTU_512: + mtu = 512; + break; + case IB_MTU_1024: + mtu = 1024; + break; + case IB_MTU_2048: + mtu = 2048; + break; + case IB_MTU_4096: + if (!ipath_mtu4096) + goto err; + mtu = 4096; + break; + default: + /* XXX We have already partially updated our state! */ + goto err; + } + ipath_set_mtu(dd, mtu); + + dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; + + /* We only support VL0 */ + if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1) + goto err; + + if (pip->mkey_violations == 0) + dev->mkey_violations = 0; + + /* + * Hardware counter can't be reset so snapshot and subtract + * later. + */ + if (pip->pkey_violations == 0) + dev->z_pkey_violations = ipath_get_cr_errpkey(dd); + + if (pip->qkey_violations == 0) + dev->qkey_violations = 0; + + ore = pip->localphyerrors_overrunerrors; + if (set_phyerrthreshold(dd, (ore >> 4) & 0xF)) + goto err; + + if (set_overrunthreshold(dd, (ore & 0xF))) + goto err; + + dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; + + if (pip->clientrereg_resv_subnetto & 0x80) { + clientrereg = 1; + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); + } + + /* + * Do the port state change now that the other link parameters + * have been set. + * Changing the port physical state only makes sense if the link + * is down or is being set to down. + */ + state = pip->linkspeed_portstate & 0xF; + lstate = (pip->portphysstate_linkdown >> 4) & 0xF; + if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) + goto err; + + /* + * Only state changes of DOWN, ARM, and ACTIVE are valid + * and must be in the correct state to take effect (see 7.2.6). + */ + switch (state) { + case IB_PORT_NOP: + if (lstate == 0) + break; + /* FALLTHROUGH */ + case IB_PORT_DOWN: + if (lstate == 0) + lstate = IPATH_IB_LINKDOWN_ONLY; + else if (lstate == 1) + lstate = IPATH_IB_LINKDOWN_SLEEP; + else if (lstate == 2) + lstate = IPATH_IB_LINKDOWN; + else if (lstate == 3) + lstate = IPATH_IB_LINKDOWN_DISABLE; + else + goto err; + ipath_set_linkstate(dd, lstate); + if (lstate == IPATH_IB_LINKDOWN_DISABLE) { + ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + goto done; + } + ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED | + IPATH_LINKACTIVE, 1000); + break; + case IB_PORT_ARMED: + ipath_set_linkstate(dd, IPATH_IB_LINKARM); + break; + case IB_PORT_ACTIVE: + ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); + break; + default: + /* XXX We have already partially updated our state! */ + goto err; + } + + ret = recv_subn_get_portinfo(smp, ibdev, port); + + if (clientrereg) + pip->clientrereg_resv_subnetto |= 0x80; + + goto done; + +err: + smp->status |= IB_SMP_INVALID_FIELD; + ret = recv_subn_get_portinfo(smp, ibdev, port); + +done: + return ret; +} + +/** + * rm_pkey - decrecment the reference count for the given PKEY + * @dd: the infinipath device + * @key: the PKEY index + * + * Return true if this was the last reference and the hardware table entry + * needs to be changed. + */ +static int rm_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (dd->ipath_pkeys[i] != key) + continue; + if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { + dd->ipath_pkeys[i] = 0; + ret = 1; + goto bail; + } + break; + } + + ret = 0; + +bail: + return ret; +} + +/** + * add_pkey - add the given PKEY to the hardware table + * @dd: the infinipath device + * @key: the PKEY + * + * Return an error code if unable to add the entry, zero if no change, + * or 1 if the hardware PKEY register needs to be updated. + */ +static int add_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + u16 lkey = key & 0x7FFF; + int any = 0; + int ret; + + if (lkey == 0x7FFF) { + ret = 0; + goto bail; + } + + /* Look for an empty slot or a matching PKEY. */ + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i]) { + any++; + continue; + } + /* If it matches exactly, try to increment the ref count */ + if (dd->ipath_pkeys[i] == key) { + if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { + ret = 0; + goto bail; + } + /* Lost the race. Look for an empty slot below. */ + atomic_dec(&dd->ipath_pkeyrefs[i]); + any++; + } + /* + * It makes no sense to have both the limited and unlimited + * PKEY set at the same time since the unlimited one will + * disable the limited one. + */ + if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i] && + atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { + /* for ipathstats, etc. */ + ipath_stats.sps_pkeys[i] = lkey; + dd->ipath_pkeys[i] = key; + ret = 1; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * set_pkeys - set the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the PKEY table + */ +static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) +{ + struct ipath_portdata *pd; + int i; + int changed = 0; + + /* always a kernel port, no locking needed */ + pd = dd->ipath_pd[0]; + + for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { + u16 key = pkeys[i]; + u16 okey = pd->port_pkeys[i]; + + if (key == okey) + continue; + /* + * The value of this PKEY table entry is changing. + * Remove the old entry in the hardware's array of PKEYs. + */ + if (okey & 0x7FFF) + changed |= rm_pkey(dd, okey); + if (key & 0x7FFF) { + int ret = add_pkey(dd, key); + + if (ret < 0) + key = 0; + else + changed |= ret; + } + pd->port_pkeys[i] = key; + } + if (changed) { + u64 pkey; + + pkey = (u64) dd->ipath_pkeys[0] | + ((u64) dd->ipath_pkeys[1] << 16) | + ((u64) dd->ipath_pkeys[2] << 32) | + ((u64) dd->ipath_pkeys[3] << 48); + ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", + (unsigned long long) pkey); + ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, + pkey); + } + return 0; +} + +static int recv_subn_set_pkeytable(struct ib_smp *smp, + struct ib_device *ibdev) +{ + u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); + __be16 *p = (__be16 *) smp->data; + u16 *q = (u16 *) smp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + unsigned i, n = ipath_get_npkeys(dev->dd); + + for (i = 0; i < n; i++) + q[i] = be16_to_cpu(p[i]); + + if (startpx != 0 || set_pkeys(dev->dd, q) != 0) + smp->status |= IB_SMP_INVALID_FIELD; + + return recv_subn_get_pkeytable(smp, ibdev); +} + +static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp) +{ + struct ib_class_port_info *p = + (struct ib_class_port_info *)pmp->data; + + memset(pmp->data, 0, sizeof(pmp->data)); + + if (pmp->mad_hdr.attr_mod != 0) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + /* Indicate AllPortSelect is valid (only one port anyway) */ + p->capability_mask = cpu_to_be16(1 << 8); + p->base_version = 1; + p->class_version = 1; + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 + * sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) pmp); +} + +/* + * The PortSamplesControl.CounterMasks field is an array of 3 bit fields + * which specify the N'th counter's capabilities. See ch. 16.1.3.2. + * We support 5 counters which only count the mandatory quantities. + */ +#define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) +#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \ + COUNTER_MASK(1, 1) | \ + COUNTER_MASK(1, 2) | \ + COUNTER_MASK(1, 3) | \ + COUNTER_MASK(1, 4)) + +static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplescontrol *p = + (struct ib_pma_portsamplescontrol *)pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + unsigned long flags; + u8 port_select = p->port_select; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || + (port_select != port && port_select != 0xFF)) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + /* + * Ticks are 10x the link transfer period which for 2.5Gbs is 4 + * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample + * intervals are counted in ticks. Since we use Linux timers, that + * count in jiffies, we can't sample for less than 1000 ticks if HZ + * == 1000 (4000 ticks if HZ is 250). link_speed_active returns 2 for + * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that + * have hardware support for delaying packets. + */ + if (crp->cr_psstat) + p->tick = dev->dd->ipath_link_speed_active - 1; + else + p->tick = 250; /* 1 usec. */ + p->counter_width = 4; /* 32 bit counters */ + p->counter_mask0_9 = COUNTER_MASK0_9; + spin_lock_irqsave(&dev->pending_lock, flags); + if (crp->cr_psstat) + p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + p->sample_status = dev->pma_sample_status; + p->sample_start = cpu_to_be32(dev->pma_sample_start); + p->sample_interval = cpu_to_be32(dev->pma_sample_interval); + p->tag = cpu_to_be16(dev->pma_tag); + p->counter_select[0] = dev->pma_counter_select[0]; + p->counter_select[1] = dev->pma_counter_select[1]; + p->counter_select[2] = dev->pma_counter_select[2]; + p->counter_select[3] = dev->pma_counter_select[3]; + p->counter_select[4] = dev->pma_counter_select[4]; + spin_unlock_irqrestore(&dev->pending_lock, flags); + + return reply((struct ib_smp *) pmp); +} + +static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplescontrol *p = + (struct ib_pma_portsamplescontrol *)pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + unsigned long flags; + u8 status; + int ret; + + if (pmp->mad_hdr.attr_mod != 0 || + (p->port_select != port && p->port_select != 0xFF)) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + spin_lock_irqsave(&dev->pending_lock, flags); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + dev->pma_sample_start = be32_to_cpu(p->sample_start); + dev->pma_sample_interval = be32_to_cpu(p->sample_interval); + dev->pma_tag = be16_to_cpu(p->tag); + dev->pma_counter_select[0] = p->counter_select[0]; + dev->pma_counter_select[1] = p->counter_select[1]; + dev->pma_counter_select[2] = p->counter_select[2]; + dev->pma_counter_select[3] = p->counter_select[3]; + dev->pma_counter_select[4] = p->counter_select[4]; + if (crp->cr_psstat) { + ipath_write_creg(dev->dd, crp->cr_psinterval, + dev->pma_sample_interval); + ipath_write_creg(dev->dd, crp->cr_psstart, + dev->pma_sample_start); + } else + dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED; + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + + ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port); + +bail: + return ret; +} + +static u64 get_counter(struct ipath_ibdev *dev, + struct ipath_cregs const *crp, + __be16 sel) +{ + u64 ret; + + switch (sel) { + case IB_PMA_PORT_XMIT_DATA: + ret = (crp->cr_psxmitdatacount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) : + dev->ipath_sword; + break; + case IB_PMA_PORT_RCV_DATA: + ret = (crp->cr_psrcvdatacount) ? + ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) : + dev->ipath_rword; + break; + case IB_PMA_PORT_XMIT_PKTS: + ret = (crp->cr_psxmitpktscount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) : + dev->ipath_spkts; + break; + case IB_PMA_PORT_RCV_PKTS: + ret = (crp->cr_psrcvpktscount) ? + ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) : + dev->ipath_rpkts; + break; + case IB_PMA_PORT_XMIT_WAIT: + ret = (crp->cr_psxmitwaitcount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) : + dev->ipath_xmit_wait; + break; + default: + ret = 0; + } + + return ret; +} + +static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp, + struct ib_device *ibdev) +{ + struct ib_pma_portsamplesresult *p = + (struct ib_pma_portsamplesresult *)pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + u8 status; + int i; + + memset(pmp->data, 0, sizeof(pmp->data)); + p->tag = cpu_to_be16(dev->pma_tag); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + p->sample_status = cpu_to_be16(status); + for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) + p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : + cpu_to_be32( + get_counter(dev, crp, dev->pma_counter_select[i])); + + return reply((struct ib_smp *) pmp); +} + +static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev) +{ + struct ib_pma_portsamplesresult_ext *p = + (struct ib_pma_portsamplesresult_ext *)pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + u8 status; + int i; + + memset(pmp->data, 0, sizeof(pmp->data)); + p->tag = cpu_to_be16(dev->pma_tag); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + p->sample_status = cpu_to_be16(status); + /* 64 bits */ + p->extended_width = cpu_to_be32(0x80000000); + for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) + p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : + cpu_to_be64( + get_counter(dev, crp, dev->pma_counter_select[i])); + + return reply((struct ib_smp *) pmp); +} + +static int recv_pma_get_portcounters(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_verbs_counters cntrs; + u8 port_select = p->port_select; + + ipath_get_counters(dev->dd, &cntrs); + + /* Adjust counters for any resets done. */ + cntrs.symbol_error_counter -= dev->z_symbol_error_counter; + cntrs.link_error_recovery_counter -= + dev->z_link_error_recovery_counter; + cntrs.link_downed_counter -= dev->z_link_downed_counter; + cntrs.port_rcv_errors += dev->rcv_errors; + cntrs.port_rcv_errors -= dev->z_port_rcv_errors; + cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors; + cntrs.port_xmit_discards -= dev->z_port_xmit_discards; + cntrs.port_xmit_data -= dev->z_port_xmit_data; + cntrs.port_rcv_data -= dev->z_port_rcv_data; + cntrs.port_xmit_packets -= dev->z_port_xmit_packets; + cntrs.port_rcv_packets -= dev->z_port_rcv_packets; + cntrs.local_link_integrity_errors -= + dev->z_local_link_integrity_errors; + cntrs.excessive_buffer_overrun_errors -= + dev->z_excessive_buffer_overrun_errors; + cntrs.vl15_dropped -= dev->z_vl15_dropped; + cntrs.vl15_dropped += dev->n_vl15_dropped; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || + (port_select != port && port_select != 0xFF)) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + if (cntrs.symbol_error_counter > 0xFFFFUL) + p->symbol_error_counter = cpu_to_be16(0xFFFF); + else + p->symbol_error_counter = + cpu_to_be16((u16)cntrs.symbol_error_counter); + if (cntrs.link_error_recovery_counter > 0xFFUL) + p->link_error_recovery_counter = 0xFF; + else + p->link_error_recovery_counter = + (u8)cntrs.link_error_recovery_counter; + if (cntrs.link_downed_counter > 0xFFUL) + p->link_downed_counter = 0xFF; + else + p->link_downed_counter = (u8)cntrs.link_downed_counter; + if (cntrs.port_rcv_errors > 0xFFFFUL) + p->port_rcv_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_errors = + cpu_to_be16((u16) cntrs.port_rcv_errors); + if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_remphys_errors = + cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); + if (cntrs.port_xmit_discards > 0xFFFFUL) + p->port_xmit_discards = cpu_to_be16(0xFFFF); + else + p->port_xmit_discards = + cpu_to_be16((u16)cntrs.port_xmit_discards); + if (cntrs.local_link_integrity_errors > 0xFUL) + cntrs.local_link_integrity_errors = 0xFUL; + if (cntrs.excessive_buffer_overrun_errors > 0xFUL) + cntrs.excessive_buffer_overrun_errors = 0xFUL; + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | + cntrs.excessive_buffer_overrun_errors; + if (cntrs.vl15_dropped > 0xFFFFUL) + p->vl15_dropped = cpu_to_be16(0xFFFF); + else + p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); + if (cntrs.port_xmit_data > 0xFFFFFFFFUL) + p->port_xmit_data = cpu_to_be32(0xFFFFFFFF); + else + p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); + if (cntrs.port_rcv_data > 0xFFFFFFFFUL) + p->port_rcv_data = cpu_to_be32(0xFFFFFFFF); + else + p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); + if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) + p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF); + else + p->port_xmit_packets = + cpu_to_be32((u32)cntrs.port_xmit_packets); + if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) + p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF); + else + p->port_rcv_packets = + cpu_to_be32((u32) cntrs.port_rcv_packets); + + return reply((struct ib_smp *) pmp); +} + +static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters_ext *p = + (struct ib_pma_portcounters_ext *)pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + u64 swords, rwords, spkts, rpkts, xwait; + u8 port_select = p->port_select; + + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); + + /* Adjust counters for any resets done. */ + swords -= dev->z_port_xmit_data; + rwords -= dev->z_port_rcv_data; + spkts -= dev->z_port_xmit_packets; + rpkts -= dev->z_port_rcv_packets; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || + (port_select != port && port_select != 0xFF)) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + p->port_xmit_data = cpu_to_be64(swords); + p->port_rcv_data = cpu_to_be64(rwords); + p->port_xmit_packets = cpu_to_be64(spkts); + p->port_rcv_packets = cpu_to_be64(rpkts); + p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv); + + return reply((struct ib_smp *) pmp); +} + +static int recv_pma_set_portcounters(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_verbs_counters cntrs; + + /* + * Since the HW doesn't support clearing counters, we save the + * current count and subtract it from future responses. + */ + ipath_get_counters(dev->dd, &cntrs); + + if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) + dev->z_symbol_error_counter = cntrs.symbol_error_counter; + + if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY) + dev->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + + if (p->counter_select & IB_PMA_SEL_LINK_DOWNED) + dev->z_link_downed_counter = cntrs.link_downed_counter; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS) + dev->z_port_rcv_errors = + cntrs.port_rcv_errors + dev->rcv_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS) + dev->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) + dev->z_port_xmit_discards = cntrs.port_xmit_discards; + + if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS) + dev->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + + if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS) + dev->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { + dev->n_vl15_dropped = 0; + dev->z_vl15_dropped = cntrs.vl15_dropped; + } + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) + dev->z_port_xmit_data = cntrs.port_xmit_data; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA) + dev->z_port_rcv_data = cntrs.port_rcv_data; + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS) + dev->z_port_xmit_packets = cntrs.port_xmit_packets; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS) + dev->z_port_rcv_packets = cntrs.port_rcv_packets; + + return recv_pma_get_portcounters(pmp, ibdev, port); +} + +static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct ipath_ibdev *dev = to_idev(ibdev); + u64 swords, rwords, spkts, rpkts, xwait; + + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); + + if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) + dev->z_port_xmit_data = swords; + + if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA) + dev->z_port_rcv_data = rwords; + + if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS) + dev->z_port_xmit_packets = spkts; + + if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) + dev->z_port_rcv_packets = rpkts; + + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) + dev->n_unicast_xmit = 0; + + if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) + dev->n_unicast_rcv = 0; + + if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) + dev->n_multicast_xmit = 0; + + if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) + dev->n_multicast_rcv = 0; + + return recv_pma_get_portcounters_ext(pmp, ibdev, port); +} + +static int process_subn(struct ib_device *ibdev, int mad_flags, + u8 port_num, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_smp *smp = (struct ib_smp *)out_mad; + struct ipath_ibdev *dev = to_idev(ibdev); + int ret; + + *out_mad = *in_mad; + if (smp->class_version != 1) { + smp->status |= IB_SMP_UNSUP_VERSION; + ret = reply(smp); + goto bail; + } + + /* Is the mkey in the process of expiring? */ + if (dev->mkey_lease_timeout && + time_after_eq(jiffies, dev->mkey_lease_timeout)) { + /* Clear timeout and mkey protection field. */ + dev->mkey_lease_timeout = 0; + dev->mkeyprot = 0; + } + + /* + * M_Key checking depends on + * Portinfo:M_Key_protect_bits + */ + if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 && + dev->mkey != smp->mkey && + (smp->method == IB_MGMT_METHOD_SET || + (smp->method == IB_MGMT_METHOD_GET && + dev->mkeyprot >= 2))) { + if (dev->mkey_violations != 0xFFFF) + ++dev->mkey_violations; + if (dev->mkey_lease_timeout || + dev->mkey_lease_period == 0) { + ret = IB_MAD_RESULT_SUCCESS | + IB_MAD_RESULT_CONSUMED; + goto bail; + } + dev->mkey_lease_timeout = jiffies + + dev->mkey_lease_period * HZ; + /* Future: Generate a trap notice. */ + ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + goto bail; + } else if (dev->mkey_lease_timeout) + dev->mkey_lease_timeout = 0; + + switch (smp->method) { + case IB_MGMT_METHOD_GET: + switch (smp->attr_id) { + case IB_SMP_ATTR_NODE_DESC: + ret = recv_subn_get_nodedescription(smp, ibdev); + goto bail; + case IB_SMP_ATTR_NODE_INFO: + ret = recv_subn_get_nodeinfo(smp, ibdev, port_num); + goto bail; + case IB_SMP_ATTR_GUID_INFO: + ret = recv_subn_get_guidinfo(smp, ibdev); + goto bail; + case IB_SMP_ATTR_PORT_INFO: + ret = recv_subn_get_portinfo(smp, ibdev, port_num); + goto bail; + case IB_SMP_ATTR_PKEY_TABLE: + ret = recv_subn_get_pkeytable(smp, ibdev); + goto bail; + case IB_SMP_ATTR_SM_INFO: + if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { + ret = IB_MAD_RESULT_SUCCESS | + IB_MAD_RESULT_CONSUMED; + goto bail; + } + if (dev->port_cap_flags & IB_PORT_SM) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + /* FALLTHROUGH */ + default: + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (smp->attr_id) { + case IB_SMP_ATTR_GUID_INFO: + ret = recv_subn_set_guidinfo(smp, ibdev); + goto bail; + case IB_SMP_ATTR_PORT_INFO: + ret = recv_subn_set_portinfo(smp, ibdev, port_num); + goto bail; + case IB_SMP_ATTR_PKEY_TABLE: + ret = recv_subn_set_pkeytable(smp, ibdev); + goto bail; + case IB_SMP_ATTR_SM_INFO: + if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { + ret = IB_MAD_RESULT_SUCCESS | + IB_MAD_RESULT_CONSUMED; + goto bail; + } + if (dev->port_cap_flags & IB_PORT_SM) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + /* FALLTHROUGH */ + default: + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + goto bail; + } + + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_REPORT: + case IB_MGMT_METHOD_REPORT_RESP: + case IB_MGMT_METHOD_TRAP_REPRESS: + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + default: + smp->status |= IB_SMP_UNSUP_METHOD; + ret = reply(smp); + } + +bail: + return ret; +} + +static int process_perf(struct ib_device *ibdev, u8 port_num, + struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; + int ret; + + *out_mad = *in_mad; + if (pmp->mad_hdr.class_version != 1) { + pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + switch (pmp->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + switch (pmp->mad_hdr.attr_id) { + case IB_PMA_CLASS_PORT_INFO: + ret = recv_pma_get_classportinfo(pmp); + goto bail; + case IB_PMA_PORT_SAMPLES_CONTROL: + ret = recv_pma_get_portsamplescontrol(pmp, ibdev, + port_num); + goto bail; + case IB_PMA_PORT_SAMPLES_RESULT: + ret = recv_pma_get_portsamplesresult(pmp, ibdev); + goto bail; + case IB_PMA_PORT_SAMPLES_RESULT_EXT: + ret = recv_pma_get_portsamplesresult_ext(pmp, + ibdev); + goto bail; + case IB_PMA_PORT_COUNTERS: + ret = recv_pma_get_portcounters(pmp, ibdev, + port_num); + goto bail; + case IB_PMA_PORT_COUNTERS_EXT: + ret = recv_pma_get_portcounters_ext(pmp, ibdev, + port_num); + goto bail; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (pmp->mad_hdr.attr_id) { + case IB_PMA_PORT_SAMPLES_CONTROL: + ret = recv_pma_set_portsamplescontrol(pmp, ibdev, + port_num); + goto bail; + case IB_PMA_PORT_COUNTERS: + ret = recv_pma_set_portcounters(pmp, ibdev, + port_num); + goto bail; + case IB_PMA_PORT_COUNTERS_EXT: + ret = recv_pma_set_portcounters_ext(pmp, ibdev, + port_num); + goto bail; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) pmp); + } + +bail: + return ret; +} + +/** + * ipath_process_mad - process an incoming MAD packet + * @ibdev: the infiniband device this packet came in on + * @mad_flags: MAD flags + * @port_num: the port number this packet came in on + * @in_wc: the work completion entry for this packet + * @in_grh: the global route header for this packet + * @in_mad: the incoming MAD + * @out_mad: any outgoing MAD reply + * + * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not + * interested in processing. + * + * Note that the verbs framework has already done the MAD sanity checks, + * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE + * MADs. + * + * This is called by the ib_mad module. + */ +int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + int ret; + + switch (in_mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + ret = process_subn(ibdev, mad_flags, port_num, + in_mad, out_mad); + goto bail; + case IB_MGMT_CLASS_PERF_MGMT: + ret = process_perf(ibdev, port_num, in_mad, out_mad); + goto bail; + default: + ret = IB_MAD_RESULT_SUCCESS; + } + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c new file mode 100644 index 00000000..e7327422 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "ipath_verbs.h" + +/** + * ipath_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct ipath_mmap_info + */ +void ipath_release_mmap_info(struct kref *ref) +{ + struct ipath_mmap_info *ip = + container_of(ref, struct ipath_mmap_info, ref); + struct ipath_ibdev *dev = to_idev(ip->context->device); + + spin_lock_irq(&dev->pending_lock); + list_del(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + + vfree(ip->obj); + kfree(ip); +} + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void ipath_vma_open(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void ipath_vma_close(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, ipath_release_mmap_info); +} + +static const struct vm_operations_struct ipath_vm_ops = { + .open = ipath_vma_open, + .close = ipath_vma_close, +}; + +/** + * ipath_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct ipath_ibdev *dev = to_idev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct ipath_mmap_info *ip, *pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&dev->pending_lock); + list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, + pending_mmaps) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (__u64) offset != ip->offset) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + list_del_init(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &ipath_vm_ops; + vma->vm_private_data = ip; + ipath_vma_open(vma); + goto done; + } + spin_unlock_irq(&dev->pending_lock); +done: + return ret; +} + +/* + * Allocate information for ipath_mmap + */ +struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj) { + struct ipath_mmap_info *ip; + + ip = kmalloc(sizeof *ip, GFP_KERNEL); + if (!ip) + goto bail; + + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + +bail: + return ip; +} + +void ipath_update_mmap_info(struct ipath_ibdev *dev, + struct ipath_mmap_info *ip, + u32 size, void *obj) { + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + ip->size = size; + ip->obj = obj; +} diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c new file mode 100644 index 00000000..e346d389 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include +#include +#include + +#include "ipath_verbs.h" + +/* Fast memory region */ +struct ipath_fmr { + struct ib_fmr ibfmr; + u8 page_shift; + struct ipath_mregion mr; /* must be last */ +}; + +static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct ipath_fmr, ibfmr); +} + +/** + * ipath_get_dma_mr - get a DMA memory region + * @pd: protection domain for this memory region + * @acc: access flags + * + * Returns the memory region on success, otherwise returns an errno. + * Note that all DMA addresses should be created via the + * struct ib_dma_mapping_ops functions (see ipath_dma.c). + */ +struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ipath_mr *mr; + struct ib_mr *ret; + + mr = kzalloc(sizeof *mr, GFP_KERNEL); + if (!mr) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + mr->mr.access_flags = acc; + ret = &mr->ibmr; + +bail: + return ret; +} + +static struct ipath_mr *alloc_mr(int count, + struct ipath_lkey_table *lk_table) +{ + struct ipath_mr *mr; + int m, i = 0; + + /* Allocate struct plus pointers to first level page tables. */ + m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ; + mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + if (!mr) + goto done; + + /* Allocate first level page tables. */ + for (; i < m; i++) { + mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); + if (!mr->mr.map[i]) + goto bail; + } + mr->mr.mapsz = m; + + /* + * ib_reg_phys_mr() will initialize mr->ibmr except for + * lkey and rkey. + */ + if (!ipath_alloc_lkey(lk_table, &mr->mr)) + goto bail; + mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey; + + goto done; + +bail: + while (i) { + i--; + kfree(mr->mr.map[i]); + } + kfree(mr); + mr = NULL; + +done: + return mr; +} + +/** + * ipath_reg_phys_mr - register a physical memory region + * @pd: protection domain for this memory region + * @buffer_list: pointer to the list of physical buffers to register + * @num_phys_buf: the number of physical buffers to register + * @iova_start: the starting address passed over IB which maps to this MR + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start) +{ + struct ipath_mr *mr; + int n, m, i; + struct ib_mr *ret; + + mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); + if (mr == NULL) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + mr->mr.pd = pd; + mr->mr.user_base = *iova_start; + mr->mr.iova = *iova_start; + mr->mr.length = 0; + mr->mr.offset = 0; + mr->mr.access_flags = acc; + mr->mr.max_segs = num_phys_buf; + mr->umem = NULL; + + m = 0; + n = 0; + for (i = 0; i < num_phys_buf; i++) { + mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr; + mr->mr.map[m]->segs[n].length = buffer_list[i].size; + mr->mr.length += buffer_list[i].size; + n++; + if (n == IPATH_SEGSZ) { + m++; + n = 0; + } + } + + ret = &mr->ibmr; + +bail: + return ret; +} + +/** + * ipath_reg_user_mr - register a userspace memory region + * @pd: protection domain for this memory region + * @start: starting userspace address + * @length: length of region to register + * @virt_addr: virtual address to use (from HCA's point of view) + * @mr_access_flags: access flags for this memory region + * @udata: unused by the InfiniPath driver + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata) +{ + struct ipath_mr *mr; + struct ib_umem *umem; + struct ib_umem_chunk *chunk; + int n, m, i; + struct ib_mr *ret; + + if (length == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(umem)) + return (void *) umem; + + n = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) + n += chunk->nents; + + mr = alloc_mr(n, &to_idev(pd->device)->lk_table); + if (!mr) { + ret = ERR_PTR(-ENOMEM); + ib_umem_release(umem); + goto bail; + } + + mr->mr.pd = pd; + mr->mr.user_base = start; + mr->mr.iova = virt_addr; + mr->mr.length = length; + mr->mr.offset = umem->offset; + mr->mr.access_flags = mr_access_flags; + mr->mr.max_segs = n; + mr->umem = umem; + + m = 0; + n = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) { + for (i = 0; i < chunk->nents; i++) { + void *vaddr; + + vaddr = page_address(sg_page(&chunk->page_list[i])); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == IPATH_SEGSZ) { + m++; + n = 0; + } + } + } + ret = &mr->ibmr; + +bail: + return ret; +} + +/** + * ipath_dereg_mr - unregister and free a memory region + * @ibmr: the memory region to free + * + * Returns 0 on success. + * + * Note that this is called to free MRs created by ipath_get_dma_mr() + * or ipath_reg_user_mr(). + */ +int ipath_dereg_mr(struct ib_mr *ibmr) +{ + struct ipath_mr *mr = to_imr(ibmr); + int i; + + ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey); + i = mr->mr.mapsz; + while (i) { + i--; + kfree(mr->mr.map[i]); + } + + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + return 0; +} + +/** + * ipath_alloc_fmr - allocate a fast memory region + * @pd: the protection domain for this memory region + * @mr_access_flags: access flags for this memory region + * @fmr_attr: fast memory region attributes + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ipath_fmr *fmr; + int m, i = 0; + struct ib_fmr *ret; + + /* Allocate struct plus pointers to first level page tables. */ + m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ; + fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + if (!fmr) + goto bail; + + /* Allocate first level page tables. */ + for (; i < m; i++) { + fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], + GFP_KERNEL); + if (!fmr->mr.map[i]) + goto bail; + } + fmr->mr.mapsz = m; + + /* + * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & + * rkey. + */ + if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) + goto bail; + fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey; + /* + * Resources are allocated but no valid mapping (RKEY can't be + * used). + */ + fmr->mr.pd = pd; + fmr->mr.user_base = 0; + fmr->mr.iova = 0; + fmr->mr.length = 0; + fmr->mr.offset = 0; + fmr->mr.access_flags = mr_access_flags; + fmr->mr.max_segs = fmr_attr->max_pages; + fmr->page_shift = fmr_attr->page_shift; + + ret = &fmr->ibfmr; + goto done; + +bail: + while (i) + kfree(fmr->mr.map[--i]); + kfree(fmr); + ret = ERR_PTR(-ENOMEM); + +done: + return ret; +} + +/** + * ipath_map_phys_fmr - set up a fast memory region + * @ibmfr: the fast memory region to set up + * @page_list: the list of pages to associate with the fast memory region + * @list_len: the number of pages to associate with the fast memory region + * @iova: the virtual address of the start of the fast memory region + * + * This may be called from interrupt context. + */ + +int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, + int list_len, u64 iova) +{ + struct ipath_fmr *fmr = to_ifmr(ibfmr); + struct ipath_lkey_table *rkt; + unsigned long flags; + int m, n, i; + u32 ps; + int ret; + + if (list_len > fmr->mr.max_segs) { + ret = -EINVAL; + goto bail; + } + rkt = &to_idev(ibfmr->device)->lk_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = iova; + fmr->mr.iova = iova; + ps = 1 << fmr->page_shift; + fmr->mr.length = list_len * ps; + m = 0; + n = 0; + ps = 1 << fmr->page_shift; + for (i = 0; i < list_len; i++) { + fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; + fmr->mr.map[m]->segs[n].length = ps; + if (++n == IPATH_SEGSZ) { + m++; + n = 0; + } + } + spin_unlock_irqrestore(&rkt->lock, flags); + ret = 0; + +bail: + return ret; +} + +/** + * ipath_unmap_fmr - unmap fast memory regions + * @fmr_list: the list of fast memory regions to unmap + * + * Returns 0 on success. + */ +int ipath_unmap_fmr(struct list_head *fmr_list) +{ + struct ipath_fmr *fmr; + struct ipath_lkey_table *rkt; + unsigned long flags; + + list_for_each_entry(fmr, fmr_list, ibfmr.list) { + rkt = &to_idev(fmr->ibfmr.device)->lk_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = 0; + fmr->mr.iova = 0; + fmr->mr.length = 0; + spin_unlock_irqrestore(&rkt->lock, flags); + } + return 0; +} + +/** + * ipath_dealloc_fmr - deallocate a fast memory region + * @ibfmr: the fast memory region to deallocate + * + * Returns 0 on success. + */ +int ipath_dealloc_fmr(struct ib_fmr *ibfmr) +{ + struct ipath_fmr *fmr = to_ifmr(ibfmr); + int i; + + ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey); + i = fmr->mr.mapsz; + while (i) + kfree(fmr->mr.map[--i]); + kfree(fmr); + return 0; +} diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c new file mode 100644 index 00000000..0857a9c3 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -0,0 +1,1080 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "ipath_verbs.h" +#include "ipath_kernel.h" + +#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) +#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) +#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \ + (off)) +#define find_next_offset(map, off) find_next_zero_bit((map)->page, \ + BITS_PER_PAGE, off) + +/* + * Convert the AETH credit code into the number of credits. + */ +static u32 credit_table[31] = { + 0, /* 0 */ + 1, /* 1 */ + 2, /* 2 */ + 3, /* 3 */ + 4, /* 4 */ + 6, /* 5 */ + 8, /* 6 */ + 12, /* 7 */ + 16, /* 8 */ + 24, /* 9 */ + 32, /* A */ + 48, /* B */ + 64, /* C */ + 96, /* D */ + 128, /* E */ + 192, /* F */ + 256, /* 10 */ + 384, /* 11 */ + 512, /* 12 */ + 768, /* 13 */ + 1024, /* 14 */ + 1536, /* 15 */ + 2048, /* 16 */ + 3072, /* 17 */ + 4096, /* 18 */ + 6144, /* 19 */ + 8192, /* 1A */ + 12288, /* 1B */ + 16384, /* 1C */ + 24576, /* 1D */ + 32768 /* 1E */ +}; + + +static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long flags; + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock_irqsave(&qpt->lock, flags); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock_irqrestore(&qpt->lock, flags); +} + + +static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type) +{ + u32 i, offset, max_scan, qpn; + struct qpn_map *map; + u32 ret = -1; + + if (type == IB_QPT_SMI) + ret = 0; + else if (type == IB_QPT_GSI) + ret = 1; + + if (ret != -1) { + map = &qpt->map[0]; + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) { + ret = -ENOMEM; + goto bail; + } + } + if (!test_and_set_bit(ret, map->page)) + atomic_dec(&map->n_free); + else + ret = -EBUSY; + goto bail; + } + + qpn = qpt->last + 1; + if (qpn >= QPN_MAX) + qpn = 2; + offset = qpn & BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / BITS_PER_PAGE]; + max_scan = qpt->nmaps - !offset; + for (i = 0;;) { + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) + break; + } + if (likely(atomic_read(&map->n_free))) { + do { + if (!test_and_set_bit(offset, map->page)) { + atomic_dec(&map->n_free); + qpt->last = qpn; + ret = qpn; + goto bail; + } + offset = find_next_offset(map, offset); + qpn = mk_qpn(qpt, map, offset); + /* + * This test differs from alloc_pidmap(). + * If find_next_offset() does find a zero + * bit, we don't need to check for QPN + * wrapping around past our starting QPN. + * We just need to be sure we don't loop + * forever. + */ + } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); + } + /* + * In order to keep the number of pages allocated to a + * minimum, we scan the all existing pages before increasing + * the size of the bitmap table. + */ + if (++i > max_scan) { + if (qpt->nmaps == QPNMAP_ENTRIES) + break; + map = &qpt->map[qpt->nmaps++]; + offset = 0; + } else if (map < &qpt->map[qpt->nmaps]) { + ++map; + offset = 0; + } else { + map = &qpt->map[0]; + offset = 2; + } + qpn = mk_qpn(qpt, map, offset); + } + + ret = -ENOMEM; + +bail: + return ret; +} + +static void free_qpn(struct ipath_qp_table *qpt, u32 qpn) +{ + struct qpn_map *map; + + map = qpt->map + qpn / BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); + atomic_inc(&map->n_free); +} + +/** + * ipath_alloc_qpn - allocate a QP number + * @qpt: the QP table + * @qp: the QP + * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special) + * + * Allocate the next available QPN and put the QP into the hash table. + * The hash table holds a reference to the QP. + */ +static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, + enum ib_qp_type type) +{ + unsigned long flags; + int ret; + + ret = alloc_qpn(qpt, type); + if (ret < 0) + goto bail; + qp->ibqp.qp_num = ret; + + /* Add the QP to the hash table. */ + spin_lock_irqsave(&qpt->lock, flags); + + ret %= qpt->max; + qp->next = qpt->table[ret]; + qpt->table[ret] = qp; + atomic_inc(&qp->refcount); + + spin_unlock_irqrestore(&qpt->lock, flags); + ret = 0; + +bail: + return ret; +} + +/** + * ipath_free_qp - remove a QP from the QP table + * @qpt: the QP table + * @qp: the QP to remove + * + * Remove the QP from the table so it can't be found asynchronously by + * the receive interrupt routine. + */ +static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) +{ + struct ipath_qp *q, **qpp; + unsigned long flags; + + spin_lock_irqsave(&qpt->lock, flags); + + /* Remove QP from the hash table. */ + qpp = &qpt->table[qp->ibqp.qp_num % qpt->max]; + for (; (q = *qpp) != NULL; qpp = &q->next) { + if (q == qp) { + *qpp = qp->next; + qp->next = NULL; + atomic_dec(&qp->refcount); + break; + } + } + + spin_unlock_irqrestore(&qpt->lock, flags); +} + +/** + * ipath_free_all_qps - check for QPs still in use + * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. + */ +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) +{ + unsigned long flags; + struct ipath_qp *qp; + u32 n, qp_inuse = 0; + + spin_lock_irqsave(&qpt->lock, flags); + for (n = 0; n < qpt->max; n++) { + qp = qpt->table[n]; + qpt->table[n] = NULL; + + for (; qp; qp = qp->next) + qp_inuse++; + } + spin_unlock_irqrestore(&qpt->lock, flags); + + for (n = 0; n < ARRAY_SIZE(qpt->map); n++) + if (qpt->map[n].page) + free_page((unsigned long) qpt->map[n].page); + return qp_inuse; +} + +/** + * ipath_lookup_qpn - return the QP with the given QPN + * @qpt: the QP table + * @qpn: the QP number to look up + * + * The caller is responsible for decrementing the QP reference count + * when done. + */ +struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn) +{ + unsigned long flags; + struct ipath_qp *qp; + + spin_lock_irqsave(&qpt->lock, flags); + + for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) { + if (qp->ibqp.qp_num == qpn) { + atomic_inc(&qp->refcount); + break; + } + } + + spin_unlock_irqrestore(&qpt->lock, flags); + return qp; +} + +/** + * ipath_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + */ +static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) +{ + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; + atomic_set(&qp->s_dma_busy, 0); + qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; + qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_pkt_delay = 0; + qp->s_draining = 0; + qp->s_psn = 0; + qp->r_psn = 0; + qp->r_msn = 0; + if (type == IB_QPT_RC) { + qp->s_state = IB_OPCODE_RC_SEND_LAST; + qp->r_state = IB_OPCODE_RC_SEND_LAST; + } else { + qp->s_state = IB_OPCODE_UC_SEND_LAST; + qp->r_state = IB_OPCODE_UC_SEND_LAST; + } + qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; + qp->s_rnr_timeout = 0; + qp->s_head = 0; + qp->s_tail = 0; + qp->s_cur = 0; + qp->s_last = 0; + qp->s_ssn = 1; + qp->s_lsn = 0; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } +} + +/** + * ipath_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active + * + * Flushes both send and receive work queues. + * Returns true if last WQE event should be generated. + * The QP s_lock should be held and interrupts disabled. + * If we are already in error state, just return. + */ + +int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ib_wc wc; + int ret = 0; + + if (qp->state == IB_QPS_ERR) + goto bail; + + qp->state = IB_QPS_ERR; + + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + ipath_schedule_send(qp); + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct ipath_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) + ret = 1; + +bail: + return ret; +} + +/** + * ipath_modify_qp - modify the attributes of a queue pair + * @ibqp: the queue pair who's attributes we're modifying + * @attr: the new attributes + * @attr_mask: the mask of attributes to modify + * @udata: user data for ipathverbs.so + * + * Returns 0 on success, otherwise returns an errno. + */ +int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct ipath_ibdev *dev = to_idev(ibqp->device); + struct ipath_qp *qp = to_iqp(ibqp); + enum ib_qp_state cur_state, new_state; + int lastwqe = 0; + int ret; + + spin_lock_irq(&qp->s_lock); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask)) + goto inval; + + if (attr_mask & IB_QP_AV) { + if (attr->ah_attr.dlid == 0 || + attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) + goto inval; + + if ((attr->ah_attr.ah_flags & IB_AH_GRH) && + (attr->ah_attr.grh.sgid_index > 1)) + goto inval; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) + goto inval; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + if (attr->min_rnr_timer > 31) + goto inval; + + if (attr_mask & IB_QP_PORT) + if (attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + /* + * don't allow invalid Path MTU values or greater than 2048 + * unless we are configured for a 4KB MTU + */ + if ((attr_mask & IB_QP_PATH_MTU) && + (ib_mtu_enum_to_int(attr->path_mtu) == -1 || + (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096))) + goto inval; + + if (attr_mask & IB_QP_PATH_MIG_STATE) + if (attr->path_mig_state != IB_MIG_MIGRATED && + attr->path_mig_state != IB_MIG_REARM) + goto inval; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) + goto inval; + + switch (new_state) { + case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + spin_lock_irq(&qp->s_lock); + } + ipath_reset_qp(qp, ibqp->qp_type); + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + + case IB_QPS_ERR: + lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + qp->state = new_state; + break; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + qp->s_pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_DEST_QPN) + qp->remote_qpn = attr->dest_qp_num; + + if (attr_mask & IB_QP_SQ_PSN) { + qp->s_psn = qp->s_next_psn = attr->sq_psn; + qp->s_last_psn = qp->s_next_psn - 1; + } + + if (attr_mask & IB_QP_RQ_PSN) + qp->r_psn = attr->rq_psn; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_AV) { + qp->remote_ah_attr = attr->ah_attr; + qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate); + } + + if (attr_mask & IB_QP_PATH_MTU) + qp->path_mtu = attr->path_mtu; + + if (attr_mask & IB_QP_RETRY_CNT) + qp->s_retry = qp->s_retry_cnt = attr->retry_cnt; + + if (attr_mask & IB_QP_RNR_RETRY) { + qp->s_rnr_retry = attr->rnr_retry; + if (qp->s_rnr_retry > 7) + qp->s_rnr_retry = 7; + qp->s_rnr_retry_cnt = qp->s_rnr_retry; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) + qp->timeout = attr->timeout; + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + + spin_unlock_irq(&qp->s_lock); + + if (lastwqe) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + ret = 0; + goto bail; + +inval: + spin_unlock_irq(&qp->s_lock); + ret = -EINVAL; + +bail: + return ret; +} + +int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct ipath_qp *qp = to_iqp(ibqp); + + attr->qp_state = qp->state; + attr->cur_qp_state = attr->qp_state; + attr->path_mtu = qp->path_mtu; + attr->path_mig_state = 0; + attr->qkey = qp->qkey; + attr->rq_psn = qp->r_psn; + attr->sq_psn = qp->s_next_psn; + attr->dest_qp_num = qp->remote_qpn; + attr->qp_access_flags = qp->qp_access_flags; + attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; + attr->cap.max_send_sge = qp->s_max_sge; + attr->cap.max_recv_sge = qp->r_rq.max_sge; + attr->cap.max_inline_data = 0; + attr->ah_attr = qp->remote_ah_attr; + memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr)); + attr->pkey_index = qp->s_pkey_index; + attr->alt_pkey_index = 0; + attr->en_sqd_async_notify = 0; + attr->sq_draining = qp->s_draining; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; + attr->min_rnr_timer = qp->r_min_rnr_timer; + attr->port_num = 1; + attr->timeout = qp->timeout; + attr->retry_cnt = qp->s_retry_cnt; + attr->rnr_retry = qp->s_rnr_retry_cnt; + attr->alt_port_num = 0; + attr->alt_timeout = 0; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->cap = attr->cap; + if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->port_num = 1; + return 0; +} + +/** + * ipath_compute_aeth - compute the AETH (syndrome + MSN) + * @qp: the queue pair to compute the AETH for + * + * Returns the AETH. + */ +__be32 ipath_compute_aeth(struct ipath_qp *qp) +{ + u32 aeth = qp->r_msn & IPATH_MSN_MASK; + + if (qp->ibqp.srq) { + /* + * Shared receive queues don't generate credits. + * Set the credit field to the invalid value. + */ + aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT; + } else { + u32 min, max, x; + u32 credits; + struct ipath_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + /* + * Compute the number of credits available (RWQEs). + * XXX Not holding the r_rq.lock here so there is a small + * chance that the pair of reads are not atomic. + */ + credits = head - tail; + if ((int)credits < 0) + credits += qp->r_rq.size; + /* + * Binary search the credit table to find the code to + * use. + */ + min = 0; + max = 31; + for (;;) { + x = (min + max) / 2; + if (credit_table[x] == credits) + break; + if (credit_table[x] > credits) + max = x; + else if (min == x) + break; + else + min = x; + } + aeth |= x << IPATH_AETH_CREDIT_SHIFT; + } + return cpu_to_be32(aeth); +} + +/** + * ipath_create_qp - create a queue pair for a device + * @ibpd: the protection domain who's device we create the queue pair for + * @init_attr: the attributes of the queue pair + * @udata: unused by InfiniPath + * + * Returns the queue pair on success, otherwise returns an errno. + * + * Called by the ib_create_qp() core verbs function. + */ +struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ipath_qp *qp; + int err; + struct ipath_swqe *swq = NULL; + struct ipath_ibdev *dev; + size_t sz; + size_t sg_list_sz; + struct ib_qp *ret; + + if (init_attr->create_flags) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + if (init_attr->cap.max_send_sge > ib_ipath_max_sges || + init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > ib_ipath_max_sges || + init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + } + + switch (init_attr->qp_type) { + case IB_QPT_UC: + case IB_QPT_RC: + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + sz = sizeof(struct ipath_sge) * + init_attr->cap.max_send_sge + + sizeof(struct ipath_swqe); + swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (swq == NULL) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct ipath_srq *srq = to_isrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kmalloc(sz + sg_list_sz, GFP_KERNEL); + if (!qp) { + ret = ERR_PTR(-ENOMEM); + goto bail_swq; + } + if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || + init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI)) { + qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); + if (!qp->r_ud_sg_list) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } else + qp->r_ud_sg_list = NULL; + if (init_attr->srq) { + sz = 0; + qp->r_rq.size = 0; + qp->r_rq.max_sge = 0; + qp->r_rq.wq = NULL; + init_attr->cap.max_recv_wr = 0; + init_attr->cap.max_recv_sge = 0; + } else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct ipath_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_sg_list; + } + } + + /* + * ib_create_qp() will initialize qp->ibqp + * except for qp->ibqp.qp_num. + */ + spin_lock_init(&qp->s_lock); + spin_lock_init(&qp->r_rq.lock); + atomic_set(&qp->refcount, 0); + init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); + tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); + INIT_LIST_HEAD(&qp->piowait); + INIT_LIST_HEAD(&qp->timerwait); + qp->state = IB_QPS_RESET; + qp->s_wq = swq; + qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_max_sge = init_attr->cap.max_send_sge; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = IPATH_S_SIGNAL_REQ_WR; + else + qp->s_flags = 0; + dev = to_idev(ibpd->device); + err = ipath_alloc_qpn(&dev->qp_table, qp, + init_attr->qp_type); + if (err) { + ret = ERR_PTR(err); + vfree(qp->r_rq.wq); + goto bail_sg_list; + } + qp->ip = NULL; + qp->s_tx = NULL; + ipath_reset_qp(qp, init_attr->qp_type); + break; + + default: + /* Don't support raw QPs */ + ret = ERR_PTR(-ENOSYS); + goto bail; + } + + init_attr->cap.max_inline_data = 0; + + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + u32 s = sizeof(struct ipath_rwq) + + qp->r_rq.size * sz; + + qp->ip = + ipath_create_mmap_info(dev, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + err = ib_copy_to_udata(udata, &(qp->ip->offset), + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_ipath_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = &qp->ibqp; + goto bail; + +bail_ip: + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + ipath_free_qp(&dev->qp_table, qp); + free_qpn(&dev->qp_table, qp->ibqp.qp_num); +bail_sg_list: + kfree(qp->r_ud_sg_list); +bail_qp: + kfree(qp); +bail_swq: + vfree(swq); +bail: + return ret; +} + +/** + * ipath_destroy_qp - destroy a queue pair + * @ibqp: the queue pair to destroy + * + * Returns 0 on success. + * + * Note that this can be called while the QP is actively sending or + * receiving! + */ +int ipath_destroy_qp(struct ib_qp *ibqp) +{ + struct ipath_qp *qp = to_iqp(ibqp); + struct ipath_ibdev *dev = to_idev(ibqp->device); + + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + } else + spin_unlock_irq(&qp->s_lock); + + ipath_free_qp(&dev->qp_table, qp); + + if (qp->s_tx) { + atomic_dec(&qp->refcount); + if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) + kfree(qp->s_tx->txreq.map_addr); + spin_lock_irq(&dev->pending_lock); + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); + spin_unlock_irq(&dev->pending_lock); + qp->s_tx = NULL; + } + + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qp_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + kfree(qp->r_ud_sg_list); + vfree(qp->s_wq); + kfree(qp); + return 0; +} + +/** + * ipath_init_qp_table - initialize the QP table for a device + * @idev: the device who's QP table we're initializing + * @size: the size of the QP table + * + * Returns 0 on success, otherwise returns an errno. + */ +int ipath_init_qp_table(struct ipath_ibdev *idev, int size) +{ + int i; + int ret; + + idev->qp_table.last = 1; /* QPN 0 and 1 are special. */ + idev->qp_table.max = size; + idev->qp_table.nmaps = 1; + idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table), + GFP_KERNEL); + if (idev->qp_table.table == NULL) { + ret = -ENOMEM; + goto bail; + } + + for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) { + atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE); + idev->qp_table.map[i].page = NULL; + } + + ret = 0; + +bail: + return ret; +} + +/** + * ipath_get_credit - flush the send work queue of a QP + * @qp: the qp who's send work queue to flush + * @aeth: the Acknowledge Extended Transport Header + * + * The QP s_lock should be held. + */ +void ipath_get_credit(struct ipath_qp *qp, u32 aeth) +{ + u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK; + + /* + * If the credit is invalid, we can send + * as many packets as we like. Otherwise, we have to + * honor the credit field. + */ + if (credit == IPATH_AETH_CREDIT_INVAL) + qp->s_lsn = (u32) -1; + else if (qp->s_lsn != (u32) -1) { + /* Compute new LSN (i.e., MSN + credit) */ + credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK; + if (ipath_cmp24(credit, qp->s_lsn) > 0) + qp->s_lsn = credit; + } + + /* Restart sending if it was blocked due to lack of credits. */ + if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && + qp->s_cur != qp->s_head && + (qp->s_lsn == (u32) -1 || + ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, + qp->s_lsn + 1) <= 0)) + ipath_schedule_send(qp); +} diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c new file mode 100644 index 00000000..79b3dbc9 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -0,0 +1,1969 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "ipath_verbs.h" +#include "ipath_kernel.h" + +/* cut down ridiculously long IB macro names */ +#define OP(x) IB_OPCODE_RC_##x + +static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, + u32 psn, u32 pmtu) +{ + u32 len; + + len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; + ss->sge = wqe->sg_list[0]; + ss->sg_list = wqe->sg_list + 1; + ss->num_sge = wqe->wr.num_sge; + ipath_skip_sge(ss, len); + return wqe->length - len; +} + +/** + * ipath_init_restart- initialize the qp->s_sge after a restart + * @qp: the QP who's SGE we're restarting + * @wqe: the work queue to initialize the QP's SGE from + * + * The QP s_lock should be held and interrupts disabled. + */ +static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) +{ + struct ipath_ibdev *dev; + + qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, + ib_mtu_enum_to_int(qp->path_mtu)); + dev = to_idev(qp->ibqp.device); + spin_lock(&dev->pending_lock); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); +} + +/** + * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) + * @qp: a pointer to the QP + * @ohdr: a pointer to the IB header being constructed + * @pmtu: the path MTU + * + * Return 1 if constructed; otherwise, return 0. + * Note that we are in the responder's side of the QP context. + * Note the QP s_lock must be held. + */ +static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, + struct ipath_other_headers *ohdr, u32 pmtu) +{ + struct ipath_ack_entry *e; + u32 hwords; + u32 len; + u32 bth0; + u32 bth2; + + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto bail; + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + + switch (qp->s_ack_state) { + case OP(RDMA_READ_RESPONSE_LAST): + case OP(RDMA_READ_RESPONSE_ONLY): + case OP(ATOMIC_ACKNOWLEDGE): + /* + * We can increment the tail pointer now that the last + * response has been sent instead of only being + * constructed. + */ + if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; + /* FALLTHROUGH */ + case OP(SEND_ONLY): + case OP(ACKNOWLEDGE): + /* Check for no next entry in the queue. */ + if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { + if (qp->s_flags & IPATH_S_ACK_PENDING) + goto normal; + qp->s_ack_state = OP(ACKNOWLEDGE); + goto bail; + } + + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST)) { + /* Copy SGE state in case we need to resend */ + qp->s_ack_rdma_sge = e->rdma_sge; + qp->s_cur_sge = &qp->s_ack_rdma_sge; + len = e->rdma_sge.sge.sge_length; + if (len > pmtu) { + len = pmtu; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); + } else { + qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); + e->sent = 1; + } + ohdr->u.aeth = ipath_compute_aeth(qp); + hwords++; + qp->s_ack_rdma_psn = e->psn; + bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; + } else { + /* COMPARE_SWAP or FETCH_ADD */ + qp->s_cur_sge = NULL; + len = 0; + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); + ohdr->u.at.aeth = ipath_compute_aeth(qp); + ohdr->u.at.atomic_ack_eth[0] = + cpu_to_be32(e->atomic_data >> 32); + ohdr->u.at.atomic_ack_eth[1] = + cpu_to_be32(e->atomic_data); + hwords += sizeof(ohdr->u.at) / sizeof(u32); + bth2 = e->psn; + e->sent = 1; + } + bth0 = qp->s_ack_state << 24; + break; + + case OP(RDMA_READ_RESPONSE_FIRST): + qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_READ_RESPONSE_MIDDLE): + len = qp->s_ack_rdma_sge.sge.sge_length; + if (len > pmtu) + len = pmtu; + else { + ohdr->u.aeth = ipath_compute_aeth(qp); + hwords++; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1; + } + bth0 = qp->s_ack_state << 24; + bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; + break; + + default: + normal: + /* + * Send a regular ACK. + * Set the s_ack_state so we wait until after sending + * the ACK before setting s_ack_state to ACKNOWLEDGE + * (see above). + */ + qp->s_ack_state = OP(SEND_ONLY); + qp->s_flags &= ~IPATH_S_ACK_PENDING; + qp->s_cur_sge = NULL; + if (qp->s_nak_state) + ohdr->u.aeth = + cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | + (qp->s_nak_state << + IPATH_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = ipath_compute_aeth(qp); + hwords++; + len = 0; + bth0 = OP(ACKNOWLEDGE) << 24; + bth2 = qp->s_ack_psn & IPATH_PSN_MASK; + } + qp->s_hdrwords = hwords; + qp->s_cur_size = len; + ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2); + return 1; + +bail: + return 0; +} + +/** + * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) + * @qp: a pointer to the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int ipath_make_rc_req(struct ipath_qp *qp) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_other_headers *ohdr; + struct ipath_sge_state *ss; + struct ipath_swqe *wqe; + u32 hwords; + u32 len; + u32 bth0; + u32 bth2; + u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + char newreq; + unsigned long flags; + int ret = 0; + + ohdr = &qp->s_hdr.u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr.u.l.oth; + + /* + * The lock is needed to synchronize between the sending tasklet, + * the receive interrupt handler, and timeout resends. + */ + spin_lock_irqsave(&qp->s_lock, flags); + + /* Sending responses has higher priority over sending requests. */ + if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || + (qp->s_flags & IPATH_S_ACK_PENDING) || + qp->s_ack_state != OP(ACKNOWLEDGE)) && + ipath_make_rc_ack(dev, qp, ohdr, pmtu)) + goto done; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } + + /* Leave BUSY set until RNR timeout. */ + if (qp->s_rnr_timeout) { + qp->s_flags |= IPATH_S_WAITING; + goto bail; + } + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + bth0 = 1 << 22; /* Set M bit */ + + /* Send a request. */ + wqe = get_swqe_ptr(qp, qp->s_cur); + switch (qp->s_state) { + default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; + /* + * Resend an old request or start a new one. + * + * We keep track of the current SWQE so that + * we don't reset the "furthest progress" state + * if we need to back up. + */ + newreq = 0; + if (qp->s_cur == qp->s_tail) { + /* Check if send work queue is empty. */ + if (qp->s_tail == qp->s_head) + goto bail; + /* + * If a fence is requested, wait for previous + * RDMA read and atomic operations to finish. + */ + if ((wqe->wr.send_flags & IB_SEND_FENCE) && + qp->s_num_rd_atomic) { + qp->s_flags |= IPATH_S_FENCE_PENDING; + goto bail; + } + wqe->psn = qp->s_next_psn; + newreq = 1; + } + /* + * Note that we have to be careful not to modify the + * original work request since we may need to resend + * it. + */ + len = wqe->length; + ss = &qp->s_sge; + bth2 = 0; + switch (wqe->wr.opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + /* If no credit, return. */ + if (qp->s_lsn != (u32) -1 && + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; + goto bail; + } + wqe->lpsn = wqe->psn; + if (len > pmtu) { + wqe->lpsn += (len - 1) / pmtu; + qp->s_state = OP(SEND_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_ONLY); + else { + qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + bth2 = 1 << 31; /* Request ACK. */ + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_WRITE: + if (newreq && qp->s_lsn != (u32) -1) + qp->s_lsn++; + /* FALLTHROUGH */ + case IB_WR_RDMA_WRITE_WITH_IMM: + /* If no credit, return. */ + if (qp->s_lsn != (u32) -1 && + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; + goto bail; + } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + hwords += sizeof(struct ib_reth) / sizeof(u32); + wqe->lpsn = wqe->psn; + if (len > pmtu) { + wqe->lpsn += (len - 1) / pmtu; + qp->s_state = OP(RDMA_WRITE_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_ONLY); + else { + qp->s_state = + OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after RETH */ + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + } + bth2 = 1 << 31; /* Request ACK. */ + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_READ: + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ + if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= IPATH_S_RDMAR_PENDING; + goto bail; + } + qp->s_num_rd_atomic++; + if (qp->s_lsn != (u32) -1) + qp->s_lsn++; + /* + * Adjust s_next_psn to count the + * expected number of responses. + */ + if (len > pmtu) + qp->s_next_psn += (len - 1) / pmtu; + wqe->lpsn = qp->s_next_psn++; + } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); + ss = NULL; + len = 0; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ + if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= IPATH_S_RDMAR_PENDING; + goto bail; + } + qp->s_num_rd_atomic++; + if (qp->s_lsn != (u32) -1) + qp->s_lsn++; + wqe->lpsn = wqe->psn; + } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + qp->s_state = OP(COMPARE_SWAP); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.swap); + ohdr->u.atomic_eth.compare_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + } else { + qp->s_state = OP(FETCH_ADD); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + ohdr->u.atomic_eth.compare_data = 0; + } + ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr >> 32); + ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr); + ohdr->u.atomic_eth.rkey = cpu_to_be32( + wqe->wr.wr.atomic.rkey); + hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); + ss = NULL; + len = 0; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + default: + goto bail; + } + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_len = wqe->length; + if (newreq) { + qp->s_tail++; + if (qp->s_tail >= qp->s_size) + qp->s_tail = 0; + } + bth2 |= qp->s_psn & IPATH_PSN_MASK; + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_psn = wqe->lpsn + 1; + else { + qp->s_psn++; + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + } + /* + * Put the QP on the pending list so lost ACKs will cause + * a retry. More than one request can be pending so the + * QP may already be on the dev->pending list. + */ + spin_lock(&dev->pending_lock); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); + break; + + case OP(RDMA_READ_RESPONSE_FIRST): + /* + * This case can only happen if a send is restarted. + * See ipath_restart_rc(). + */ + ipath_init_restart(qp, wqe); + /* FALLTHROUGH */ + case OP(SEND_FIRST): + qp->s_state = OP(SEND_MIDDLE); + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + bth2 = qp->s_psn++ & IPATH_PSN_MASK; + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + ss = &qp->s_sge; + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_LAST); + else { + qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + bth2 |= 1 << 31; /* Request ACK. */ + qp->s_cur++; + if (qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_READ_RESPONSE_LAST): + /* + * This case can only happen if a RDMA write is restarted. + * See ipath_restart_rc(). + */ + ipath_init_restart(qp, wqe); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_FIRST): + qp->s_state = OP(RDMA_WRITE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + bth2 = qp->s_psn++ & IPATH_PSN_MASK; + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + ss = &qp->s_sge; + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_LAST); + else { + qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + } + bth2 |= 1 << 31; /* Request ACK. */ + qp->s_cur++; + if (qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_READ_RESPONSE_MIDDLE): + /* + * This case can only happen if a RDMA read is restarted. + * See ipath_restart_rc(). + */ + ipath_init_restart(qp, wqe); + len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); + bth2 = qp->s_psn & IPATH_PSN_MASK; + qp->s_psn = wqe->lpsn + 1; + ss = NULL; + len = 0; + qp->s_cur++; + if (qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + } + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) + bth2 |= 1 << 31; /* Request ACK. */ + qp->s_len -= len; + qp->s_hdrwords = hwords; + qp->s_cur_sge = ss; + qp->s_cur_size = len; + ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * send_rc_ack - Construct an ACK packet and send it + * @qp: a pointer to the QP + * + * This is called from ipath_rc_rcv() and only uses the receive + * side QP state. + * Note that RDMA reads and atomics are handled in the + * send side QP state and tasklet. + */ +static void send_rc_ack(struct ipath_qp *qp) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_devdata *dd; + u16 lrh0; + u32 bth0; + u32 hwords; + u32 __iomem *piobuf; + struct ipath_ib_header hdr; + struct ipath_other_headers *ohdr; + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + + /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ + if (qp->r_head_ack_queue != qp->s_tail_ack_queue || + (qp->s_flags & IPATH_S_ACK_PENDING) || + qp->s_ack_state != OP(ACKNOWLEDGE)) + goto queue_ack; + + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Don't try to send ACKs if the link isn't ACTIVE */ + dd = dev->dd; + if (!(dd->ipath_flags & IPATH_LINKACTIVE)) + goto done; + + piobuf = ipath_getpiobuf(dd, 0, NULL); + if (!piobuf) { + /* + * We are out of PIO buffers at the moment. + * Pass responsibility for sending the ACK to the + * send tasklet so that when a PIO buffer becomes + * available, the ACK is sent ahead of other outgoing + * packets. + */ + spin_lock_irqsave(&qp->s_lock, flags); + goto queue_ack; + } + + /* Construct the header. */ + ohdr = &hdr.u.oth; + lrh0 = IPATH_LRH_BTH; + /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ + hwords = 6; + if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + hwords += ipath_make_grh(dev, &hdr.u.l.grh, + &qp->remote_ah_attr.grh, + hwords, 0); + ohdr = &hdr.u.l.oth; + lrh0 = IPATH_LRH_GRH; + } + /* read pkey_index w/o lock (its atomic) */ + bth0 = ipath_get_pkey(dd, qp->s_pkey_index) | + (OP(ACKNOWLEDGE) << 24) | (1 << 22); + if (qp->r_nak_state) + ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | + (qp->r_nak_state << + IPATH_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = ipath_compute_aeth(qp); + lrh0 |= qp->remote_ah_attr.sl << 4; + hdr.lrh[0] = cpu_to_be16(lrh0); + hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); + hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); + + writeq(hwords + 1, piobuf); + + if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { + u32 *hdrp = (u32 *) &hdr; + + ipath_flush_wc(); + __iowrite32_copy(piobuf + 2, hdrp, hwords - 1); + ipath_flush_wc(); + __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1); + } else + __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords); + + ipath_flush_wc(); + + dev->n_unicast_xmit++; + goto done; + +queue_ack: + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { + dev->n_rc_qacks++; + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); +done: + return; +} + +/** + * reset_psn - reset the QP state to send starting from PSN + * @qp: the QP + * @psn: the packet sequence number to restart at + * + * This is called from ipath_rc_rcv() to process an incoming RC ACK + * for the given QP. + * Called at interrupt level with the QP s_lock held. + */ +static void reset_psn(struct ipath_qp *qp, u32 psn) +{ + u32 n = qp->s_last; + struct ipath_swqe *wqe = get_swqe_ptr(qp, n); + u32 opcode; + + qp->s_cur = n; + + /* + * If we are starting the request from the beginning, + * let the normal send code handle initialization. + */ + if (ipath_cmp24(psn, wqe->psn) <= 0) { + qp->s_state = OP(SEND_LAST); + goto done; + } + + /* Find the work request opcode corresponding to the given PSN. */ + opcode = wqe->wr.opcode; + for (;;) { + int diff; + + if (++n == qp->s_size) + n = 0; + if (n == qp->s_tail) + break; + wqe = get_swqe_ptr(qp, n); + diff = ipath_cmp24(psn, wqe->psn); + if (diff < 0) + break; + qp->s_cur = n; + /* + * If we are starting the request from the beginning, + * let the normal send code handle initialization. + */ + if (diff == 0) { + qp->s_state = OP(SEND_LAST); + goto done; + } + opcode = wqe->wr.opcode; + } + + /* + * Set the state to restart in the middle of a request. + * Don't change the s_sge, s_cur_sge, or s_cur_size. + * See ipath_make_rc_req(). + */ + switch (opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + qp->s_state = OP(RDMA_READ_RESPONSE_LAST); + break; + + case IB_WR_RDMA_READ: + qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); + break; + + default: + /* + * This case shouldn't happen since its only + * one PSN per req. + */ + qp->s_state = OP(SEND_LAST); + } +done: + qp->s_psn = psn; +} + +/** + * ipath_restart_rc - back up requester to resend the last un-ACKed request + * @qp: the QP to restart + * @psn: packet sequence number for the request + * @wc: the work completion request + * + * The QP s_lock should be held and interrupts disabled. + */ +void ipath_restart_rc(struct ipath_qp *qp, u32 psn) +{ + struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + struct ipath_ibdev *dev; + + if (qp->s_retry == 0) { + ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + goto bail; + } + qp->s_retry--; + + /* + * Remove the QP from the timeout queue. + * Note: it may already have been removed by ipath_ib_timer(). + */ + dev = to_idev(qp->ibqp.device); + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + + if (wqe->wr.opcode == IB_WR_RDMA_READ) + dev->n_rc_resends++; + else + dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; + + reset_psn(qp, psn); + ipath_schedule_send(qp); + +bail: + return; +} + +static inline void update_last_psn(struct ipath_qp *qp, u32 psn) +{ + qp->s_last_psn = psn; +} + +/** + * do_rc_ack - process an incoming RC ACK + * @qp: the QP the ACK came in on + * @psn: the packet sequence number of the ACK + * @opcode: the opcode of the request that resulted in the ACK + * + * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK + * for the given QP. + * Called at interrupt level with the QP s_lock held and interrupts disabled. + * Returns 1 if OK, 0 if current operation should be aborted (NAK). + */ +static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, + u64 val) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ib_wc wc; + enum ib_wc_status status; + struct ipath_swqe *wqe; + int ret = 0; + u32 ack_psn; + int diff; + + /* + * Remove the QP from the timeout queue (or RNR timeout queue). + * If ipath_ib_timer() has already removed it, + * it's OK since we hold the QP s_lock and ipath_restart_rc() + * just won't find anything to restart if we ACK everything. + */ + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + spin_unlock(&dev->pending_lock); + + /* + * Note that NAKs implicitly ACK outstanding SEND and RDMA write + * requests and implicitly NAK RDMA read and atomic requests issued + * before the NAK'ed request. The MSN won't include the NAK'ed + * request but will include an ACK'ed request(s). + */ + ack_psn = psn; + if (aeth >> 29) + ack_psn--; + wqe = get_swqe_ptr(qp, qp->s_last); + + /* + * The MSN might be for a later WQE than the PSN indicates so + * only complete WQEs that the PSN finishes. + */ + while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) { + /* + * RDMA_READ_RESPONSE_ONLY is a special case since + * we want to generate completion events for everything + * before the RDMA read, copy the data, then generate + * the completion for the read. + */ + if (wqe->wr.opcode == IB_WR_RDMA_READ && + opcode == OP(RDMA_READ_RESPONSE_ONLY) && + diff == 0) { + ret = 1; + goto bail; + } + /* + * If this request is a RDMA read or atomic, and the ACK is + * for a later operation, this ACK NAKs the RDMA read or + * atomic. In other words, only a RDMA_READ_LAST or ONLY + * can ACK a RDMA read and likewise for atomic ops. Note + * that the NAK case can only happen if relaxed ordering is + * used and requests are sent after an RDMA read or atomic + * is sent but before the response is received. + */ + if ((wqe->wr.opcode == IB_WR_RDMA_READ && + (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || + ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && + (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { + /* + * The last valid PSN seen is the previous + * request's. + */ + update_last_psn(qp, wqe->psn - 1); + /* Retry this request. */ + ipath_restart_rc(qp, wqe->psn); + /* + * No need to process the ACK/NAK since we are + * restarting an earlier request. + */ + goto bail; + } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + *(u64 *) wqe->sg_list[0].vaddr = val; + if (qp->s_num_rd_atomic && + (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { + qp->s_num_rd_atomic--; + /* Restart sending task if fence is complete */ + if (((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) || + qp->s_flags & IPATH_S_RDMAR_PENDING) + ipath_schedule_send(qp); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + } + qp->s_retry = qp->s_retry_cnt; + /* + * If we are completing a request which is in the process of + * being resent, we can stop resending it since we know the + * responder has already seen it. + */ + if (qp->s_last == qp->s_cur) { + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + qp->s_last = qp->s_cur; + if (qp->s_last == qp->s_tail) + break; + wqe = get_swqe_ptr(qp, qp->s_cur); + qp->s_state = OP(SEND_LAST); + qp->s_psn = wqe->psn; + } else { + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) + qp->s_draining = 0; + if (qp->s_last == qp->s_tail) + break; + wqe = get_swqe_ptr(qp, qp->s_last); + } + } + + switch (aeth >> 29) { + case 0: /* ACK */ + dev->n_rc_acks++; + /* If this is a partial ACK, reset the retransmit timer. */ + if (qp->s_last != qp->s_tail) { + spin_lock(&dev->pending_lock); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); + /* + * If we get a partial ACK for a resent operation, + * we can stop resending the earlier packets and + * continue with the next packet the receiver wants. + */ + if (ipath_cmp24(qp->s_psn, psn) <= 0) { + reset_psn(qp, psn + 1); + ipath_schedule_send(qp); + } + } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = psn + 1; + } + ipath_get_credit(qp, aeth); + qp->s_rnr_retry = qp->s_rnr_retry_cnt; + qp->s_retry = qp->s_retry_cnt; + update_last_psn(qp, psn); + ret = 1; + goto bail; + + case 1: /* RNR NAK */ + dev->n_rnr_naks++; + if (qp->s_last == qp->s_tail) + goto bail; + if (qp->s_rnr_retry == 0) { + status = IB_WC_RNR_RETRY_EXC_ERR; + goto class_b; + } + if (qp->s_rnr_retry_cnt < 7) + qp->s_rnr_retry--; + + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); + + if (wqe->wr.opcode == IB_WR_RDMA_READ) + dev->n_rc_resends++; + else + dev->n_rc_resends += + (qp->s_psn - psn) & IPATH_PSN_MASK; + + reset_psn(qp, psn); + + qp->s_rnr_timeout = + ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & + IPATH_AETH_CREDIT_MASK]; + ipath_insert_rnr_queue(qp); + ipath_schedule_send(qp); + goto bail; + + case 3: /* NAK */ + if (qp->s_last == qp->s_tail) + goto bail; + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); + switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & + IPATH_AETH_CREDIT_MASK) { + case 0: /* PSN sequence error */ + dev->n_seq_naks++; + /* + * Back up to the responder's expected PSN. + * Note that we might get a NAK in the middle of an + * RDMA READ response which terminates the RDMA + * READ. + */ + ipath_restart_rc(qp, psn); + break; + + case 1: /* Invalid Request */ + status = IB_WC_REM_INV_REQ_ERR; + dev->n_other_naks++; + goto class_b; + + case 2: /* Remote Access Error */ + status = IB_WC_REM_ACCESS_ERR; + dev->n_other_naks++; + goto class_b; + + case 3: /* Remote Operation Error */ + status = IB_WC_REM_OP_ERR; + dev->n_other_naks++; + class_b: + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + /* Ignore other reserved NAK error codes */ + goto reserved; + } + qp->s_rnr_retry = qp->s_rnr_retry_cnt; + goto bail; + + default: /* 2: reserved */ + reserved: + /* Ignore reserved NAK codes. */ + goto bail; + } + +bail: + return ret; +} + +/** + * ipath_rc_rcv_resp - process an incoming RC response packet + * @dev: the device this packet came in on + * @ohdr: the other headers for this packet + * @data: the packet data + * @tlen: the packet length + * @qp: the QP for this packet + * @opcode: the opcode for this packet + * @psn: the packet sequence number for this packet + * @hdrsize: the header length + * @pmtu: the path MTU + * @header_in_data: true if part of the header data is in the data buffer + * + * This is called from ipath_rc_rcv() to process an incoming RC response + * packet for the given QP. + * Called at interrupt level. + */ +static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, + struct ipath_other_headers *ohdr, + void *data, u32 tlen, + struct ipath_qp *qp, + u32 opcode, + u32 psn, u32 hdrsize, u32 pmtu, + int header_in_data) +{ + struct ipath_swqe *wqe; + enum ib_wc_status status; + unsigned long flags; + int diff; + u32 pad; + u32 aeth; + u64 val; + + spin_lock_irqsave(&qp->s_lock, flags); + + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto ack_done; + + /* Ignore invalid responses. */ + if (ipath_cmp24(psn, qp->s_next_psn) >= 0) + goto ack_done; + + /* Ignore duplicate responses. */ + diff = ipath_cmp24(psn, qp->s_last_psn); + if (unlikely(diff <= 0)) { + /* Update credits for "ghost" ACKs */ + if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { + if (!header_in_data) + aeth = be32_to_cpu(ohdr->u.aeth); + else { + aeth = be32_to_cpu(((__be32 *) data)[0]); + data += sizeof(__be32); + } + if ((aeth >> 29) == 0) + ipath_get_credit(qp, aeth); + } + goto ack_done; + } + + if (unlikely(qp->s_last == qp->s_tail)) + goto ack_done; + wqe = get_swqe_ptr(qp, qp->s_last); + status = IB_WC_SUCCESS; + + switch (opcode) { + case OP(ACKNOWLEDGE): + case OP(ATOMIC_ACKNOWLEDGE): + case OP(RDMA_READ_RESPONSE_FIRST): + if (!header_in_data) + aeth = be32_to_cpu(ohdr->u.aeth); + else { + aeth = be32_to_cpu(((__be32 *) data)[0]); + data += sizeof(__be32); + } + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { + if (!header_in_data) { + __be32 *p = ohdr->u.at.atomic_ack_eth; + + val = ((u64) be32_to_cpu(p[0]) << 32) | + be32_to_cpu(p[1]); + } else + val = be64_to_cpu(((__be64 *) data)[0]); + } else + val = 0; + if (!do_rc_ack(qp, aeth, psn, opcode, val) || + opcode != OP(RDMA_READ_RESPONSE_FIRST)) + goto ack_done; + hdrsize += 4; + wqe = get_swqe_ptr(qp, qp->s_last); + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + qp->r_flags &= ~IPATH_R_RDMAR_SEQ; + /* + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. + */ + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_middle; + + case OP(RDMA_READ_RESPONSE_MIDDLE): + /* no AETH, no ACK */ + if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { + dev->n_rdma_seq++; + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; + ipath_restart_rc(qp, qp->s_last_psn + 1); + goto ack_done; + } + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + read_middle: + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto ack_len_err; + if (unlikely(pmtu >= qp->s_rdma_read_len)) + goto ack_len_err; + + /* We got a response so update the timeout. */ + spin_lock(&dev->pending_lock); + if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) + list_move_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); + + if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) + qp->s_retry = qp->s_retry_cnt; + + /* + * Update the RDMA receive state but do the copy w/o + * holding the locks and blocking interrupts. + */ + qp->s_rdma_read_len -= pmtu; + update_last_psn(qp, psn); + spin_unlock_irqrestore(&qp->s_lock, flags); + ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); + goto bail; + + case OP(RDMA_READ_RESPONSE_ONLY): + if (!header_in_data) + aeth = be32_to_cpu(ohdr->u.aeth); + else + aeth = be32_to_cpu(((__be32 *) data)[0]); + if (!do_rc_ack(qp, aeth, psn, opcode, 0)) + goto ack_done; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* + * Check that the data size is >= 0 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen < (hdrsize + pad + 8))) + goto ack_len_err; + /* + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. + */ + wqe = get_swqe_ptr(qp, qp->s_last); + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_last; + + case OP(RDMA_READ_RESPONSE_LAST): + /* ACKs READ req. */ + if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { + dev->n_rdma_seq++; + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; + ipath_restart_rc(qp, qp->s_last_psn + 1); + goto ack_done; + } + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* + * Check that the data size is >= 1 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen <= (hdrsize + pad + 8))) + goto ack_len_err; + read_last: + tlen -= hdrsize + pad + 8; + if (unlikely(tlen != qp->s_rdma_read_len)) + goto ack_len_err; + if (!header_in_data) + aeth = be32_to_cpu(ohdr->u.aeth); + else { + aeth = be32_to_cpu(((__be32 *) data)[0]); + data += sizeof(__be32); + } + ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); + (void) do_rc_ack(qp, aeth, psn, + OP(RDMA_READ_RESPONSE_LAST), 0); + goto ack_done; + } + +ack_op_err: + status = IB_WC_LOC_QP_OP_ERR; + goto ack_err; + +ack_len_err: + status = IB_WC_LOC_LEN_ERR; +ack_err: + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); +ack_done: + spin_unlock_irqrestore(&qp->s_lock, flags); +bail: + return; +} + +/** + * ipath_rc_rcv_error - process an incoming duplicate or error RC packet + * @dev: the device this packet came in on + * @ohdr: the other headers for this packet + * @data: the packet data + * @qp: the QP for this packet + * @opcode: the opcode for this packet + * @psn: the packet sequence number for this packet + * @diff: the difference between the PSN and the expected PSN + * @header_in_data: true if part of the header data is in the data buffer + * + * This is called from ipath_rc_rcv() to process an unexpected + * incoming RC packet for the given QP. + * Called at interrupt level. + * Return 1 if no more processing is needed; otherwise return 0 to + * schedule a response to be sent. + */ +static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, + struct ipath_other_headers *ohdr, + void *data, + struct ipath_qp *qp, + u32 opcode, + u32 psn, + int diff, + int header_in_data) +{ + struct ipath_ack_entry *e; + u8 i, prev; + int old_req; + unsigned long flags; + + if (diff > 0) { + /* + * Packet sequence error. + * A NAK will ACK earlier sends and RDMA writes. + * Don't queue the NAK if we already sent one. + */ + if (!qp->r_nak_state) { + qp->r_nak_state = IB_NAK_PSN_ERROR; + /* Use the expected PSN. */ + qp->r_ack_psn = qp->r_psn; + goto send_ack; + } + goto done; + } + + /* + * Handle a duplicate request. Don't re-execute SEND, RDMA + * write or atomic op. Don't NAK errors, just silently drop + * the duplicate request. Note that r_sge, r_len, and + * r_rcv_len may be in use so don't modify them. + * + * We are supposed to ACK the earliest duplicate PSN but we + * can coalesce an outstanding duplicate ACK. We have to + * send the earliest so that RDMA reads can be restarted at + * the requester's expected PSN. + * + * First, find where this duplicate PSN falls within the + * ACKs previously sent. + */ + psn &= IPATH_PSN_MASK; + e = NULL; + old_req = 1; + + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock_done; + + for (i = qp->r_head_ack_queue; ; i = prev) { + if (i == qp->s_tail_ack_queue) + old_req = 0; + if (i) + prev = i - 1; + else + prev = IPATH_MAX_RDMA_ATOMIC; + if (prev == qp->r_head_ack_queue) { + e = NULL; + break; + } + e = &qp->s_ack_queue[prev]; + if (!e->opcode) { + e = NULL; + break; + } + if (ipath_cmp24(psn, e->psn) >= 0) { + if (prev == qp->s_tail_ack_queue) + old_req = 0; + break; + } + } + switch (opcode) { + case OP(RDMA_READ_REQUEST): { + struct ib_reth *reth; + u32 offset; + u32 len; + + /* + * If we didn't find the RDMA read request in the ack queue, + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. + */ + if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) + goto unlock_done; + /* RETH comes after BTH */ + if (!header_in_data) + reth = &ohdr->u.rc.reth; + else { + reth = (struct ib_reth *)data; + data += sizeof(*reth); + } + /* + * Address range must be a subset of the original + * request and start on pmtu boundaries. + * We reuse the old ack_queue slot since the requester + * should not back up and request an earlier PSN for the + * same request. + */ + offset = ((psn - e->psn) & IPATH_PSN_MASK) * + ib_mtu_enum_to_int(qp->path_mtu); + len = be32_to_cpu(reth->length); + if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) + goto unlock_done; + if (len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + ok = ipath_rkey_ok(qp, &e->rdma_sge, + len, vaddr, rkey, + IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) + goto unlock_done; + } else { + e->rdma_sge.sg_list = NULL; + e->rdma_sge.num_sge = 0; + e->rdma_sge.sge.mr = NULL; + e->rdma_sge.sge.vaddr = NULL; + e->rdma_sge.sge.length = 0; + e->rdma_sge.sge.sge_length = 0; + } + e->psn = psn; + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = prev; + break; + } + + case OP(COMPARE_SWAP): + case OP(FETCH_ADD): { + /* + * If we didn't find the atomic request in the ack queue + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. + */ + if (!e || e->opcode != (u8) opcode || old_req) + goto unlock_done; + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = prev; + break; + } + + default: + if (old_req) + goto unlock_done; + /* + * Resend the most recent ACK if this request is + * after all the previous RDMA reads and atomics. + */ + if (i == qp->r_head_ack_queue) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->r_psn - 1; + goto send_ack; + } + /* + * Try to send a simple ACK to work around a Mellanox bug + * which doesn't accept a RDMA read response or atomic + * response as an ACK for earlier SENDs or RDMA writes. + */ + if (qp->r_head_ack_queue == qp->s_tail_ack_queue && + !(qp->s_flags & IPATH_S_ACK_PENDING) && + qp->s_ack_state == OP(ACKNOWLEDGE)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; + goto send_ack; + } + /* + * Resend the RDMA read or atomic op which + * ACKs this duplicate request. + */ + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = i; + break; + } + qp->r_nak_state = 0; + ipath_schedule_send(qp); + +unlock_done: + spin_unlock_irqrestore(&qp->s_lock, flags); +done: + return 1; + +send_ack: + return 0; +} + +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +{ + unsigned long flags; + int lastwqe; + + spin_lock_irqsave(&qp->s_lock, flags); + lastwqe = ipath_error_qp(qp, err); + spin_unlock_irqrestore(&qp->s_lock, flags); + + if (lastwqe) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} + +static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) +{ + unsigned next; + + next = n + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + if (n == qp->s_tail_ack_queue) { + qp->s_tail_ack_queue = next; + qp->s_ack_state = OP(ACKNOWLEDGE); + } +} + +/** + * ipath_rc_rcv - process an incoming RC packet + * @dev: the device this packet came in on + * @hdr: the header of this packet + * @has_grh: true if the header has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP for this packet + * + * This is called from ipath_qp_rcv() to process an incoming RC packet + * for the given QP. + * Called at interrupt level. + */ +void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct ipath_qp *qp) +{ + struct ipath_other_headers *ohdr; + u32 opcode; + u32 hdrsize; + u32 psn; + u32 pad; + struct ib_wc wc; + u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + int diff; + struct ib_reth *reth; + int header_in_data; + unsigned long flags; + + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12; /* LRH + BTH */ + psn = be32_to_cpu(ohdr->bth[2]); + header_in_data = 0; + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ + /* + * The header with GRH is 60 bytes and the core driver sets + * the eager header buffer size to 56 bytes so the last 4 + * bytes of the BTH header (PSN) is in the data buffer. + */ + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; + if (header_in_data) { + psn = be32_to_cpu(((__be32 *) data)[0]); + data += sizeof(__be32); + } else + psn = be32_to_cpu(ohdr->bth[2]); + } + + /* + * Process responses (ACKs) before anything else. Note that the + * packet sequence number will be for something in the send work + * queue rather than the expected receive packet sequence number. + * In other words, this QP is the requester. + */ + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) { + ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, + hdrsize, pmtu, header_in_data); + goto done; + } + + /* Compute 24 bits worth of difference. */ + diff = ipath_cmp24(psn, qp->r_psn); + if (unlikely(diff)) { + if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, + psn, diff, header_in_data)) + goto done; + goto send_ack; + } + + /* Check for opcode sequence errors. */ + switch (qp->r_state) { + case OP(SEND_FIRST): + case OP(SEND_MIDDLE): + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + break; + goto nack_inv; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_MIDDLE): + if (opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + break; + goto nack_inv; + + default: + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + goto nack_inv; + /* + * Note that it is up to the requester to not send a new + * RDMA read or atomic operation before receiving an ACK + * for the previous operation. + */ + break; + } + + memset(&wc, 0, sizeof wc); + + /* OK, process the packet. */ + switch (opcode) { + case OP(SEND_FIRST): + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; + qp->r_rcv_len = 0; + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + case OP(RDMA_WRITE_MIDDLE): + send_middle: + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto nack_inv; + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) + goto nack_inv; + ipath_copy_sge(&qp->r_sge, data, pmtu); + break; + + case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): + /* consume RWQE */ + if (!ipath_get_rwqe(qp, 1)) + goto rnr_nak; + goto send_last_imm; + + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; + qp->r_rcv_len = 0; + if (opcode == OP(SEND_ONLY)) + goto send_last; + /* FALLTHROUGH */ + case OP(SEND_LAST_WITH_IMMEDIATE): + send_last_imm: + if (header_in_data) { + wc.ex.imm_data = *(__be32 *) data; + data += sizeof(__be32); + } else { + /* Immediate data comes after BTH */ + wc.ex.imm_data = ohdr->u.imm_data; + } + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + /* FALLTHROUGH */ + case OP(SEND_LAST): + case OP(RDMA_WRITE_LAST): + send_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto nack_inv; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + wc.byte_len = tlen + qp->r_rcv_len; + if (unlikely(wc.byte_len > qp->r_len)) + goto nack_inv; + ipath_copy_sge(&qp->r_sge, data, tlen); + qp->r_msn++; + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) + break; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + /* Signal completion event if the solicited bit is set. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(1 << 23)) != 0); + break; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) + goto nack_inv; + /* consume RWQE */ + /* RETH comes after BTH */ + if (!header_in_data) + reth = &ohdr->u.rc.reth; + else { + reth = (struct ib_reth *)data; + data += sizeof(*reth); + } + hdrsize += sizeof(*reth); + qp->r_len = be32_to_cpu(reth->length); + qp->r_rcv_len = 0; + if (qp->r_len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey & NAK */ + ok = ipath_rkey_ok(qp, &qp->r_sge, + qp->r_len, vaddr, rkey, + IB_ACCESS_REMOTE_WRITE); + if (unlikely(!ok)) + goto nack_acc; + } else { + qp->r_sge.sg_list = NULL; + qp->r_sge.sge.mr = NULL; + qp->r_sge.sge.vaddr = NULL; + qp->r_sge.sge.length = 0; + qp->r_sge.sge.sge_length = 0; + } + if (opcode == OP(RDMA_WRITE_FIRST)) + goto send_middle; + else if (opcode == OP(RDMA_WRITE_ONLY)) + goto send_last; + if (!ipath_get_rwqe(qp, 1)) + goto rnr_nak; + goto send_last_imm; + + case OP(RDMA_READ_REQUEST): { + struct ipath_ack_entry *e; + u32 len; + u8 next; + + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_READ))) + goto nack_inv; + next = qp->r_head_ack_queue + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv_unlck; + ipath_update_ack_queue(qp, next); + } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + /* RETH comes after BTH */ + if (!header_in_data) + reth = &ohdr->u.rc.reth; + else { + reth = (struct ib_reth *)data; + data += sizeof(*reth); + } + len = be32_to_cpu(reth->length); + if (len) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey & NAK */ + ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, + rkey, IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) + goto nack_acc_unlck; + /* + * Update the next expected PSN. We add 1 later + * below, so only add the remainder here. + */ + if (len > pmtu) + qp->r_psn += (len - 1) / pmtu; + } else { + e->rdma_sge.sg_list = NULL; + e->rdma_sge.num_sge = 0; + e->rdma_sge.sge.mr = NULL; + e->rdma_sge.sge.vaddr = NULL; + e->rdma_sge.sge.length = 0; + e->rdma_sge.sge.sge_length = 0; + } + e->opcode = opcode; + e->sent = 0; + e->psn = psn; + /* + * We need to increment the MSN here instead of when we + * finish sending the result since a duplicate request would + * increment it more than once. + */ + qp->r_msn++; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + qp->r_head_ack_queue = next; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + + goto unlock; + } + + case OP(COMPARE_SWAP): + case OP(FETCH_ADD): { + struct ib_atomic_eth *ateth; + struct ipath_ack_entry *e; + u64 vaddr; + atomic64_t *maddr; + u64 sdata; + u32 rkey; + u8 next; + + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC))) + goto nack_inv; + next = qp->r_head_ack_queue + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv_unlck; + ipath_update_ack_queue(qp, next); + } + if (!header_in_data) + ateth = &ohdr->u.atomic_eth; + else + ateth = (struct ib_atomic_eth *)data; + vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | + be32_to_cpu(ateth->vaddr[1]); + if (unlikely(vaddr & (sizeof(u64) - 1))) + goto nack_inv_unlck; + rkey = be32_to_cpu(ateth->rkey); + /* Check rkey & NAK */ + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, + sizeof(u64), vaddr, rkey, + IB_ACCESS_REMOTE_ATOMIC))) + goto nack_acc_unlck; + /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = be64_to_cpu(ateth->swap_data); + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + e->atomic_data = (opcode == OP(FETCH_ADD)) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + be64_to_cpu(ateth->compare_data), + sdata); + e->opcode = opcode; + e->sent = 0; + e->psn = psn & IPATH_PSN_MASK; + qp->r_msn++; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + qp->r_head_ack_queue = next; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + + goto unlock; + } + + default: + /* NAK unknown opcodes. */ + goto nack_inv; + } + qp->r_psn++; + qp->r_state = opcode; + qp->r_ack_psn = psn; + qp->r_nak_state = 0; + /* Send an ACK if requested or required. */ + if (psn & (1 << 31)) + goto send_ack; + goto done; + +rnr_nak: + qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); +nack_inv: + ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_INVALID_REQUEST; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); +nack_acc: + ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); + qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; + qp->r_ack_psn = qp->r_psn; +send_ack: + send_rc_ack(qp); + goto done; + +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); +done: + return; +} diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h new file mode 100644 index 00000000..8f44d0cf --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _IPATH_REGISTERS_H +#define _IPATH_REGISTERS_H + +/* + * This file should only be included by kernel source, and by the diags. It + * defines the registers, and their contents, for InfiniPath chips. + */ + +/* + * These are the InfiniPath register and buffer bit definitions, + * that are visible to software, and needed only by the kernel + * and diag code. A few, that are visible to protocol and user + * code are in ipath_common.h. Some bits are specific + * to a given chip implementation, and have been moved to the + * chip-specific source file + */ + +/* kr_revision bits */ +#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF +#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0 +#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF +#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8 +#define INFINIPATH_R_ARCH_MASK 0xFF +#define INFINIPATH_R_ARCH_SHIFT 16 +#define INFINIPATH_R_SOFTWARE_MASK 0xFF +#define INFINIPATH_R_SOFTWARE_SHIFT 24 +#define INFINIPATH_R_BOARDID_MASK 0xFF +#define INFINIPATH_R_BOARDID_SHIFT 32 + +/* kr_control bits */ +#define INFINIPATH_C_FREEZEMODE 0x00000002 +#define INFINIPATH_C_LINKENABLE 0x00000004 + +/* kr_sendctrl bits */ +#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16 +#define INFINIPATH_S_UPDTHRESH_SHIFT 24 +#define INFINIPATH_S_UPDTHRESH_MASK 0x1f + +#define IPATH_S_ABORT 0 +#define IPATH_S_PIOINTBUFAVAIL 1 +#define IPATH_S_PIOBUFAVAILUPD 2 +#define IPATH_S_PIOENABLE 3 +#define IPATH_S_SDMAINTENABLE 9 +#define IPATH_S_SDMASINGLEDESCRIPTOR 10 +#define IPATH_S_SDMAENABLE 11 +#define IPATH_S_SDMAHALT 12 +#define IPATH_S_DISARM 31 + +#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT) +#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL) +#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD) +#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE) +#define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE) +#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \ + (1U << IPATH_S_SDMASINGLEDESCRIPTOR) +#define INFINIPATH_S_SDMAENABLE (1U << IPATH_S_SDMAENABLE) +#define INFINIPATH_S_SDMAHALT (1U << IPATH_S_SDMAHALT) +#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM) + +/* kr_rcvctrl bits that are the same on multiple chips */ +#define INFINIPATH_R_PORTENABLE_SHIFT 0 +#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38) + +/* kr_intstatus, kr_intclear, kr_intmask bits */ +#define INFINIPATH_I_SDMAINT 0x8000000000000000ULL +#define INFINIPATH_I_SDMADISABLED 0x4000000000000000ULL +#define INFINIPATH_I_ERROR 0x0000000080000000ULL +#define INFINIPATH_I_SPIOSENT 0x0000000040000000ULL +#define INFINIPATH_I_SPIOBUFAVAIL 0x0000000020000000ULL +#define INFINIPATH_I_GPIO 0x0000000010000000ULL +#define INFINIPATH_I_JINT 0x0000000004000000ULL + +/* kr_errorstatus, kr_errorclear, kr_errormask bits */ +#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL +#define INFINIPATH_E_RVCRC 0x0000000000000002ULL +#define INFINIPATH_E_RICRC 0x0000000000000004ULL +#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL +#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL +#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL +#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL +#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL +#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL +#define INFINIPATH_E_REBP 0x0000000000000200ULL +#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL +#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL +#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL +#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL +#define INFINIPATH_E_RBADTID 0x0000000000004000ULL +#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL +#define INFINIPATH_E_RHDR 0x0000000000010000ULL +#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL +#define INFINIPATH_E_SENDSPECIALTRIGGER 0x0000000008000000ULL +#define INFINIPATH_E_SDMADISABLED 0x0000000010000000ULL +#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL +#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL +#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL +#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL +#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL +#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL +#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL +#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL +#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL +#define INFINIPATH_E_SENDBUFMISUSE 0x0000004000000000ULL +#define INFINIPATH_E_SDMAGENMISMATCH 0x0000008000000000ULL +#define INFINIPATH_E_SDMAOUTOFBOUND 0x0000010000000000ULL +#define INFINIPATH_E_SDMATAILOUTOFBOUND 0x0000020000000000ULL +#define INFINIPATH_E_SDMABASE 0x0000040000000000ULL +#define INFINIPATH_E_SDMA1STDESC 0x0000080000000000ULL +#define INFINIPATH_E_SDMARPYTAG 0x0000100000000000ULL +#define INFINIPATH_E_SDMADWEN 0x0000200000000000ULL +#define INFINIPATH_E_SDMAMISSINGDW 0x0000400000000000ULL +#define INFINIPATH_E_SDMAUNEXPDATA 0x0000800000000000ULL +#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL +#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL +#define INFINIPATH_E_RESET 0x0004000000000000ULL +#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL +#define INFINIPATH_E_SDMADESCADDRMISALIGN 0x0010000000000000ULL +#define INFINIPATH_E_INVALIDEEPCMD 0x0020000000000000ULL + +/* + * this is used to print "common" packet errors only when the + * __IPATH_ERRPKTDBG bit is set in ipath_debug. + */ +#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \ + | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \ + | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \ + | INFINIPATH_E_REBP ) + +/* Convenience for decoding Send DMA errors */ +#define INFINIPATH_E_SDMAERRS ( \ + INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \ + INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \ + INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \ + INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \ + INFINIPATH_E_SDMAUNEXPDATA | \ + INFINIPATH_E_SDMADESCADDRMISALIGN | \ + INFINIPATH_E_SDMADISABLED | \ + INFINIPATH_E_SENDBUFMISUSE) + +/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ +/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo + * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID + * bit 4: flag buffer, 5: datainfo, 6: header info */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 +#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44 +#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL +#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL +/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL +/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL +/* waldo specific -- find the rest in ipath_6110.c */ +#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL +/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */ +#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL + +/* kr_hwdiagctrl bits */ +#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL +#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40 +#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL +#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44 +#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL +#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL +#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL +#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL +#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL + +/* kr_ibcctrl bits */ +#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL +#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0 +#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL +#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8 +#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL +#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1 +/* cycle through TS1/TS2 till OK */ +#define INFINIPATH_IBCC_LINKINITCMD_POLL 2 +/* wait for TS1, then go on */ +#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 +#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16 +#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL +#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ +#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ +#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ +#define INFINIPATH_IBCC_LINKCMD_SHIFT 18 +#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL +#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20 +#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL +#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32 +#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL +#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36 +#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL +#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40 +#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL +#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL + +/* kr_ibcstatus bits */ +#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0 +#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7 + +#define INFINIPATH_IBCS_TXREADY 0x40000000 +#define INFINIPATH_IBCS_TXCREDITOK 0x80000000 +/* link training states (shift by + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */ +#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00 +#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01 +#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02 +#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03 +#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04 +#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05 +#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08 +#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09 +#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a +#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b +#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c +#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e +#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f +/* link state machine states (shift by ibcs_ls_shift) */ +#define INFINIPATH_IBCS_L_STATE_DOWN 0x0 +#define INFINIPATH_IBCS_L_STATE_INIT 0x1 +#define INFINIPATH_IBCS_L_STATE_ARM 0x2 +#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3 +#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4 + + +/* kr_extstatus bits */ +#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1 +#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL +#define INFINIPATH_EXTS_GPIOIN_SHIFT 48 + +/* kr_extctrl bits */ +#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL +#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32 +#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL +#define INFINIPATH_EXTC_GPIOOE_SHIFT 48 +#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL +#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL +#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL +#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL +#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL +#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL +#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL +#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL +#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL +#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL +#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL +#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL +#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL + +/* kr_partitionkey bits */ +#define INFINIPATH_PKEY_SIZE 16 +#define INFINIPATH_PKEY_MASK 0xFFFF +#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF + +/* kr_serdesconfig0 bits */ +#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */ +#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */ +/* tx idle enables (per lane) */ +#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL +/* rx detect enables (per lane) */ +#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL +/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */ +#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL + +/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */ +#define INFINIPATH_XGXS_RX_POL_SHIFT 19 +#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL + + +/* + * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our + * PIO send buffers. This is well beyond anything currently + * defined in the InfiniBand spec. + */ +#define IPATH_PIO_MAXIBHDR 128 + +typedef u64 ipath_err_t; + +/* The following change with the type of device, so + * need to be part of the ipath_devdata struct, or + * we could have problems plugging in devices of + * different types (e.g. one HT, one PCIE) + * in one system, to be managed by one driver. + * On the other hand, this file is may also be included + * by other code, so leave the declarations here + * temporarily. Minor footprint issue if common-model + * linker used, none if C89+ linker used. + */ + +/* mask of defined bits for various registers */ +extern u64 infinipath_i_bitsextant; +extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; + +/* masks that are different in various chips, or only exist in some chips */ +extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; + +/* + * These are the infinipath general register numbers (not offsets). + * The kernel registers are used directly, those beyond the kernel + * registers are calculated from one of the base registers. The use of + * an integer type doesn't allow type-checking as thorough as, say, + * an enum but allows for better hiding of chip differences. + */ +typedef const u16 ipath_kreg, /* infinipath general registers */ + ipath_creg, /* infinipath counter registers */ + ipath_sreg; /* kernel-only, infinipath send registers */ + +/* + * These are the chip registers common to all infinipath chips, and + * used both by the kernel and the diagnostics or other user code. + * They are all implemented such that 64 bit accesses work. + * Some implement no more than 32 bits. Because 64 bit reads + * require 2 HT cmds on opteron, we access those with 32 bit + * reads for efficiency (they are written as 64 bits, since + * the extra 32 bits are nearly free on writes, and it slightly reduces + * complexity). The rest are all accessed as 64 bits. + */ +struct ipath_kregs { + /* These are the 32 bit group */ + ipath_kreg kr_control; + ipath_kreg kr_counterregbase; + ipath_kreg kr_intmask; + ipath_kreg kr_intstatus; + ipath_kreg kr_pagealign; + ipath_kreg kr_portcnt; + ipath_kreg kr_rcvtidbase; + ipath_kreg kr_rcvtidcnt; + ipath_kreg kr_rcvegrbase; + ipath_kreg kr_rcvegrcnt; + ipath_kreg kr_scratch; + ipath_kreg kr_sendctrl; + ipath_kreg kr_sendpiobufbase; + ipath_kreg kr_sendpiobufcnt; + ipath_kreg kr_sendpiosize; + ipath_kreg kr_sendregbase; + ipath_kreg kr_userregbase; + /* These are the 64 bit group */ + ipath_kreg kr_debugport; + ipath_kreg kr_debugportselect; + ipath_kreg kr_errorclear; + ipath_kreg kr_errormask; + ipath_kreg kr_errorstatus; + ipath_kreg kr_extctrl; + ipath_kreg kr_extstatus; + ipath_kreg kr_gpio_clear; + ipath_kreg kr_gpio_mask; + ipath_kreg kr_gpio_out; + ipath_kreg kr_gpio_status; + ipath_kreg kr_hwdiagctrl; + ipath_kreg kr_hwerrclear; + ipath_kreg kr_hwerrmask; + ipath_kreg kr_hwerrstatus; + ipath_kreg kr_ibcctrl; + ipath_kreg kr_ibcstatus; + ipath_kreg kr_intblocked; + ipath_kreg kr_intclear; + ipath_kreg kr_interruptconfig; + ipath_kreg kr_mdio; + ipath_kreg kr_partitionkey; + ipath_kreg kr_rcvbthqp; + ipath_kreg kr_rcvbufbase; + ipath_kreg kr_rcvbufsize; + ipath_kreg kr_rcvctrl; + ipath_kreg kr_rcvhdrcnt; + ipath_kreg kr_rcvhdrentsize; + ipath_kreg kr_rcvhdrsize; + ipath_kreg kr_rcvintmembase; + ipath_kreg kr_rcvintmemsize; + ipath_kreg kr_revision; + ipath_kreg kr_sendbuffererror; + ipath_kreg kr_sendpioavailaddr; + ipath_kreg kr_serdesconfig0; + ipath_kreg kr_serdesconfig1; + ipath_kreg kr_serdesstatus; + ipath_kreg kr_txintmembase; + ipath_kreg kr_txintmemsize; + ipath_kreg kr_xgxsconfig; + ipath_kreg kr_ibpllcfg; + /* use these two (and the following N ports) only with + * ipath_k*_kreg64_port(); not *kreg64() */ + ipath_kreg kr_rcvhdraddr; + ipath_kreg kr_rcvhdrtailaddr; + + /* remaining registers are not present on all types of infinipath + chips */ + ipath_kreg kr_rcvpktledcnt; + ipath_kreg kr_pcierbuftestreg0; + ipath_kreg kr_pcierbuftestreg1; + ipath_kreg kr_pcieq0serdesconfig0; + ipath_kreg kr_pcieq0serdesconfig1; + ipath_kreg kr_pcieq0serdesstatus; + ipath_kreg kr_pcieq1serdesconfig0; + ipath_kreg kr_pcieq1serdesconfig1; + ipath_kreg kr_pcieq1serdesstatus; + ipath_kreg kr_hrtbt_guid; + ipath_kreg kr_ibcddrctrl; + ipath_kreg kr_ibcddrstatus; + ipath_kreg kr_jintreload; + + /* send dma related regs */ + ipath_kreg kr_senddmabase; + ipath_kreg kr_senddmalengen; + ipath_kreg kr_senddmatail; + ipath_kreg kr_senddmahead; + ipath_kreg kr_senddmaheadaddr; + ipath_kreg kr_senddmabufmask0; + ipath_kreg kr_senddmabufmask1; + ipath_kreg kr_senddmabufmask2; + ipath_kreg kr_senddmastatus; + + /* SerDes related regs (IBA7220-only) */ + ipath_kreg kr_ibserdesctrl; + ipath_kreg kr_ib_epbacc; + ipath_kreg kr_ib_epbtrans; + ipath_kreg kr_pcie_epbacc; + ipath_kreg kr_pcie_epbtrans; + ipath_kreg kr_ib_ddsrxeq; +}; + +struct ipath_cregs { + ipath_creg cr_badformatcnt; + ipath_creg cr_erricrccnt; + ipath_creg cr_errlinkcnt; + ipath_creg cr_errlpcrccnt; + ipath_creg cr_errpkey; + ipath_creg cr_errrcvflowctrlcnt; + ipath_creg cr_err_rlencnt; + ipath_creg cr_errslencnt; + ipath_creg cr_errtidfull; + ipath_creg cr_errtidvalid; + ipath_creg cr_errvcrccnt; + ipath_creg cr_ibstatuschange; + ipath_creg cr_intcnt; + ipath_creg cr_invalidrlencnt; + ipath_creg cr_invalidslencnt; + ipath_creg cr_lbflowstallcnt; + ipath_creg cr_iblinkdowncnt; + ipath_creg cr_iblinkerrrecovcnt; + ipath_creg cr_ibsymbolerrcnt; + ipath_creg cr_pktrcvcnt; + ipath_creg cr_pktrcvflowctrlcnt; + ipath_creg cr_pktsendcnt; + ipath_creg cr_pktsendflowcnt; + ipath_creg cr_portovflcnt; + ipath_creg cr_rcvebpcnt; + ipath_creg cr_rcvovflcnt; + ipath_creg cr_rxdroppktcnt; + ipath_creg cr_senddropped; + ipath_creg cr_sendstallcnt; + ipath_creg cr_sendunderruncnt; + ipath_creg cr_unsupvlcnt; + ipath_creg cr_wordrcvcnt; + ipath_creg cr_wordsendcnt; + ipath_creg cr_vl15droppedpktcnt; + ipath_creg cr_rxotherlocalphyerrcnt; + ipath_creg cr_excessbufferovflcnt; + ipath_creg cr_locallinkintegrityerrcnt; + ipath_creg cr_rxvlerrcnt; + ipath_creg cr_rxdlidfltrcnt; + ipath_creg cr_psstat; + ipath_creg cr_psstart; + ipath_creg cr_psinterval; + ipath_creg cr_psrcvdatacount; + ipath_creg cr_psrcvpktscount; + ipath_creg cr_psxmitdatacount; + ipath_creg cr_psxmitpktscount; + ipath_creg cr_psxmitwaitcount; +}; + +#endif /* _IPATH_REGISTERS_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c new file mode 100644 index 00000000..1f95bbaf --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "ipath_verbs.h" +#include "ipath_kernel.h" + +/* + * Convert the AETH RNR timeout code into the number of milliseconds. + */ +const u32 ib_ipath_rnr_table[32] = { + 656, /* 0 */ + 1, /* 1 */ + 1, /* 2 */ + 1, /* 3 */ + 1, /* 4 */ + 1, /* 5 */ + 1, /* 6 */ + 1, /* 7 */ + 1, /* 8 */ + 1, /* 9 */ + 1, /* A */ + 1, /* B */ + 1, /* C */ + 1, /* D */ + 2, /* E */ + 2, /* F */ + 3, /* 10 */ + 4, /* 11 */ + 6, /* 12 */ + 8, /* 13 */ + 11, /* 14 */ + 16, /* 15 */ + 21, /* 16 */ + 31, /* 17 */ + 41, /* 18 */ + 62, /* 19 */ + 82, /* 1A */ + 123, /* 1B */ + 164, /* 1C */ + 246, /* 1D */ + 328, /* 1E */ + 492 /* 1F */ +}; + +/** + * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device + * @qp: the QP + * + * Called with the QP s_lock held and interrupts disabled. + * XXX Use a simple list for now. We might need a priority + * queue if we have lots of QPs waiting for RNR timeouts + * but that should be rare. + */ +void ipath_insert_rnr_queue(struct ipath_qp *qp) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + + /* We already did a spin_lock_irqsave(), so just use spin_lock */ + spin_lock(&dev->pending_lock); + if (list_empty(&dev->rnrwait)) + list_add(&qp->timerwait, &dev->rnrwait); + else { + struct list_head *l = &dev->rnrwait; + struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp, + timerwait); + + while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) { + qp->s_rnr_timeout -= nqp->s_rnr_timeout; + l = l->next; + if (l->next == &dev->rnrwait) { + nqp = NULL; + break; + } + nqp = list_entry(l->next, struct ipath_qp, + timerwait); + } + if (nqp) + nqp->s_rnr_timeout -= qp->s_rnr_timeout; + list_add(&qp->timerwait, l); + } + spin_unlock(&dev->pending_lock); +} + +/** + * ipath_init_sge - Validate a RWQE and fill in the SGE state + * @qp: the QP + * + * Return 1 if OK. + */ +int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, + u32 *lengthp, struct ipath_sge_state *ss) +{ + int i, j, ret; + struct ib_wc wc; + + *lengthp = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + *lengthp += wqe->sg_list[i].length; + j++; + } + ss->num_sge = j; + ret = 1; + goto bail; + +bad_lkey: + memset(&wc, 0, sizeof(wc)); + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + /* Signal solicited completion event. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} + +/** + * ipath_get_rwqe - copy the next RWQE into the QP's RWQE + * @qp: the QP + * @wr_id_only: update qp->r_wr_id only, not qp->r_sge + * + * Return 0 if no RWQE is available, otherwise return 1. + * + * Can be called from interrupt level. + */ +int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) +{ + unsigned long flags; + struct ipath_rq *rq; + struct ipath_rwq *wq; + struct ipath_srq *srq; + struct ipath_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + u32 tail; + int ret; + + if (qp->ibqp.srq) { + srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; + rq = &qp->r_rq; + } + + spin_lock_irqsave(&rq->lock, flags); + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + do { + if (unlikely(tail == wq->head)) { + ret = 0; + goto unlock; + } + /* Make sure entry is read after head index is read. */ + smp_rmb(); + wqe = get_rwqe_ptr(rq, tail); + if (++tail >= rq->size) + tail = 0; + if (wr_id_only) + break; + qp->r_sge.sg_list = qp->r_sg_list; + } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge)); + qp->r_wr_id = wqe->wr_id; + wq->tail = tail; + + ret = 1; + set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); + if (handler) { + u32 n; + + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; + else + n -= tail; + if (n < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + goto bail; + } + } +unlock: + spin_unlock_irqrestore(&rq->lock, flags); +bail: + return ret; +} + +/** + * ipath_ruc_loopback - handle UC and RC lookback requests + * @sqp: the sending QP + * + * This is called from ipath_do_send() to + * forward a WQE addressed to the same HCA. + * Note that although we are single threaded due to the tasklet, we still + * have to protect against post_send(). We don't have to worry about + * receive interrupts since this is a connected protocol and all packets + * will pass through here. + */ +static void ipath_ruc_loopback(struct ipath_qp *sqp) +{ + struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); + struct ipath_qp *qp; + struct ipath_swqe *wqe; + struct ipath_sge *sge; + unsigned long flags; + struct ib_wc wc; + u64 sdata; + atomic64_t *maddr; + enum ib_wc_status send_status; + + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ + qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); + + spin_lock_irqsave(&sqp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + goto unlock; + + sqp->s_flags |= IPATH_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; + } + + /* + * We can rely on the entry not changing without the s_lock + * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. + */ + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } + spin_unlock_irqrestore(&sqp->s_lock, flags); + + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + + memset(&wc, 0, sizeof wc); + send_status = IB_WC_SUCCESS; + + sqp->s_sge.sge = wqe->sg_list[0]; + sqp->s_sge.sg_list = wqe->sg_list + 1; + sqp->s_sge.num_sge = wqe->wr.num_sge; + sqp->s_len = wqe->length; + switch (wqe->wr.opcode) { + case IB_WR_SEND_WITH_IMM: + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = wqe->wr.ex.imm_data; + /* FALLTHROUGH */ + case IB_WR_SEND: + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; + break; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = wqe->wr.ex.imm_data; + if (!ipath_get_rwqe(qp, 1)) + goto rnr_nak; + /* FALLTHROUGH */ + case IB_WR_RDMA_WRITE: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; + if (wqe->length == 0) + break; + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, + wqe->wr.wr.rdma.remote_addr, + wqe->wr.wr.rdma.rkey, + IB_ACCESS_REMOTE_WRITE))) + goto acc_err; + break; + + case IB_WR_RDMA_READ: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto inv_err; + if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, + wqe->wr.wr.rdma.remote_addr, + wqe->wr.wr.rdma.rkey, + IB_ACCESS_REMOTE_READ))) + goto acc_err; + qp->r_sge.sge = wqe->sg_list[0]; + qp->r_sge.sg_list = wqe->sg_list + 1; + qp->r_sge.num_sge = wqe->wr.num_sge; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto inv_err; + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), + wqe->wr.wr.atomic.remote_addr, + wqe->wr.wr.atomic.rkey, + IB_ACCESS_REMOTE_ATOMIC))) + goto acc_err; + /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = wqe->wr.wr.atomic.compare_add; + *(u64 *) sqp->s_sge.sge.vaddr = + (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + sdata, wqe->wr.wr.atomic.swap); + goto send_comp; + + default: + send_status = IB_WC_LOC_QP_OP_ERR; + goto serr; + } + + sge = &sqp->s_sge.sge; + while (sqp->s_len) { + u32 len = sqp->s_len; + + if (len > sge->length) + len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + ipath_copy_sge(&qp->r_sge, sge->vaddr, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--sqp->s_sge.num_sge) + *sge = *sqp->s_sge.sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + sqp->s_len -= len; + } + + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) + goto send_comp; + + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + wc.port_num = 1; + /* Signal completion event if the solicited bit is set. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + wqe->wr.send_flags & IB_SEND_SOLICITED); + +send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); +flush_send: + sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + ipath_send_complete(sqp, wqe, send_status); + goto again; + +rnr_nak: + /* Handle RNR NAK */ + if (qp->ibqp.qp_type == IB_QPT_UC) + goto send_comp; + /* + * Note: we don't need the s_lock held since the BUSY flag + * makes this single threaded. + */ + if (sqp->s_rnr_retry == 0) { + send_status = IB_WC_RNR_RETRY_EXC_ERR; + goto serr; + } + if (sqp->s_rnr_retry_cnt < 7) + sqp->s_rnr_retry--; + spin_lock_irqsave(&sqp->s_lock, flags); + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) + goto clr_busy; + sqp->s_flags |= IPATH_S_WAITING; + dev->n_rnr_naks++; + sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; + ipath_insert_rnr_queue(sqp); + goto clr_busy; + +inv_err: + send_status = IB_WC_REM_INV_REQ_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +acc_err: + send_status = IB_WC_REM_ACCESS_ERR; + wc.status = IB_WC_LOC_PROT_ERR; +err: + /* responder goes to error state */ + ipath_rc_error(qp, wc.status); + +serr: + spin_lock_irqsave(&sqp->s_lock, flags); + ipath_send_complete(sqp, wqe, send_status); + if (sqp->ibqp.qp_type == IB_QPT_RC) { + int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + + sqp->s_flags &= ~IPATH_S_BUSY; + spin_unlock_irqrestore(&sqp->s_lock, flags); + if (lastwqe) { + struct ib_event ev; + + ev.device = sqp->ibqp.device; + ev.element.qp = &sqp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); + } + goto done; + } +clr_busy: + sqp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&sqp->s_lock, flags); +done: + if (qp && atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) +{ + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || + qp->ibqp.qp_type == IB_QPT_SMI) { + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } +} + +/** + * ipath_no_bufs_available - tell the layer driver we need buffers + * @qp: the QP that caused the problem + * @dev: the device we ran out of buffers on + * + * Called when we run out of PIO buffers. + * If we are now in the error state, return zero to flush the + * send work request. + */ +static int ipath_no_bufs_available(struct ipath_qp *qp, + struct ipath_ibdev *dev) +{ + unsigned long flags; + int ret = 1; + + /* + * Note that as soon as want_buffer() is called and + * possibly before it returns, ipath_ib_piobufavail() + * could be called. Therefore, put QP on the piowait list before + * enabling the PIO avail interrupt. + */ + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { + dev->n_piowait++; + qp->s_flags |= IPATH_S_WAITING; + qp->s_flags &= ~IPATH_S_BUSY; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->piowait)) + list_add_tail(&qp->piowait, &dev->piowait); + spin_unlock(&dev->pending_lock); + } else + ret = 0; + spin_unlock_irqrestore(&qp->s_lock, flags); + if (ret) + want_buffer(dev->dd, qp); + return ret; +} + +/** + * ipath_make_grh - construct a GRH header + * @dev: a pointer to the ipath device + * @hdr: a pointer to the GRH header being constructed + * @grh: the global route address to send to + * @hwords: the number of 32 bit words of header being sent + * @nwords: the number of 32 bit words of data being sent + * + * Return the size of the header in 32 bit words. + */ +u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, + struct ib_global_route *grh, u32 hwords, u32 nwords) +{ + hdr->version_tclass_flow = + cpu_to_be32((6 << 28) | + (grh->traffic_class << 20) | + grh->flow_label); + hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); + /* next_hdr is defined by C8-7 in ch. 8.4.1 */ + hdr->next_hdr = 0x1B; + hdr->hop_limit = grh->hop_limit; + /* The SGID is 32-bit aligned. */ + hdr->sgid.global.subnet_prefix = dev->gid_prefix; + hdr->sgid.global.interface_id = dev->dd->ipath_guid; + hdr->dgid = grh->dgid; + + /* GRH header size in 32-bit words. */ + return sizeof(struct ib_grh) / sizeof(u32); +} + +void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, + struct ipath_other_headers *ohdr, + u32 bth0, u32 bth2) +{ + u16 lrh0; + u32 nwords; + u32 extra_bytes; + + /* Construct the header. */ + extra_bytes = -qp->s_cur_size & 3; + nwords = (qp->s_cur_size + extra_bytes) >> 2; + lrh0 = IPATH_LRH_BTH; + if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, + &qp->remote_ah_attr.grh, + qp->s_hdrwords, nwords); + lrh0 = IPATH_LRH_GRH; + } + lrh0 |= qp->remote_ah_attr.sl << 4; + qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); + bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); + bth0 |= extra_bytes << 20; + ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); + ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(bth2); +} + +/** + * ipath_do_send - perform a send on a QP + * @data: contains a pointer to the QP + * + * Process entries in the send work queue until credit or queue is + * exhausted. Only allow one CPU to send a packet per QP (tasklet). + * Otherwise, two threads could send packets out of order. + */ +void ipath_do_send(unsigned long data) +{ + struct ipath_qp *qp = (struct ipath_qp *)data; + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + int (*make_req)(struct ipath_qp *qp); + unsigned long flags; + + if ((qp->ibqp.qp_type == IB_QPT_RC || + qp->ibqp.qp_type == IB_QPT_UC) && + qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { + ipath_ruc_loopback(qp); + goto bail; + } + + if (qp->ibqp.qp_type == IB_QPT_RC) + make_req = ipath_make_rc_req; + else if (qp->ibqp.qp_type == IB_QPT_UC) + make_req = ipath_make_uc_req; + else + make_req = ipath_make_ud_req; + + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + } + + qp->s_flags |= IPATH_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + +again: + /* Check for a constructed packet to be sent. */ + if (qp->s_hdrwords != 0) { + /* + * If no PIO bufs are available, return. An interrupt will + * call ipath_ib_piobufavail() when one is available. + */ + if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, + qp->s_cur_sge, qp->s_cur_size)) { + if (ipath_no_bufs_available(qp, dev)) + goto bail; + } + dev->n_unicast_xmit++; + /* Record that we sent the packet and s_hdr is empty. */ + qp->s_hdrwords = 0; + } + + if (make_req(qp)) + goto again; + +bail:; +} + +/* + * This should be called with s_lock held. + */ +void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, + enum ib_wc_status status) +{ + u32 old_last, last; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + return; + + /* See ch. 11.2.4.1 and 10.7.3.1 */ + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + status != IB_WC_SUCCESS) { + struct ib_wc wc; + + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = status; + wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; + wc.qp = &qp->ibqp; + if (status == IB_WC_SUCCESS) + wc.byte_len = wqe->length; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); + } + + old_last = last = qp->s_last; + if (++last >= qp->s_size) + last = 0; + qp->s_last = last; + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; +} diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c new file mode 100644 index 00000000..98ac18ec --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -0,0 +1,818 @@ +/* + * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "ipath_kernel.h" +#include "ipath_verbs.h" +#include "ipath_common.h" + +#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */ + +static void vl15_watchdog_enq(struct ipath_devdata *dd) +{ + /* ipath_sdma_lock must already be held */ + if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) { + unsigned long interval = (HZ + 19) / 20; + dd->ipath_sdma_vl15_timer.expires = jiffies + interval; + add_timer(&dd->ipath_sdma_vl15_timer); + } +} + +static void vl15_watchdog_deq(struct ipath_devdata *dd) +{ + /* ipath_sdma_lock must already be held */ + if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) { + unsigned long interval = (HZ + 19) / 20; + mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval); + } else { + del_timer(&dd->ipath_sdma_vl15_timer); + } +} + +static void vl15_watchdog_timeout(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *)opaque; + + if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) { + ipath_dbg("vl15 watchdog timeout - clearing\n"); + ipath_cancel_sends(dd, 1); + ipath_hol_down(dd); + } else { + ipath_dbg("vl15 watchdog timeout - " + "condition already cleared\n"); + } +} + +static void unmap_desc(struct ipath_devdata *dd, unsigned head) +{ + __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0]; + u64 desc[2]; + dma_addr_t addr; + size_t len; + + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + + addr = (desc[1] << 32) | (desc[0] >> 32); + len = (desc[0] >> 14) & (0x7ffULL << 2); + dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE); +} + +/* + * ipath_sdma_lock should be locked before calling this. + */ +int ipath_sdma_make_progress(struct ipath_devdata *dd) +{ + struct list_head *lp = NULL; + struct ipath_sdma_txreq *txp = NULL; + u16 dmahead; + u16 start_idx = 0; + int progress = 0; + + if (!list_empty(&dd->ipath_sdma_activelist)) { + lp = dd->ipath_sdma_activelist.next; + txp = list_entry(lp, struct ipath_sdma_txreq, list); + start_idx = txp->start_idx; + } + + /* + * Read the SDMA head register in order to know that the + * interrupt clear has been written to the chip. + * Otherwise, we may not get an interrupt for the last + * descriptor in the queue. + */ + dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead); + /* sanity check return value for error handling (chip reset, etc.) */ + if (dmahead >= dd->ipath_sdma_descq_cnt) + goto done; + + while (dd->ipath_sdma_descq_head != dmahead) { + if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC && + dd->ipath_sdma_descq_head == start_idx) { + unmap_desc(dd, dd->ipath_sdma_descq_head); + start_idx++; + if (start_idx == dd->ipath_sdma_descq_cnt) + start_idx = 0; + } + + /* increment free count and head */ + dd->ipath_sdma_descq_removed++; + if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt) + dd->ipath_sdma_descq_head = 0; + + if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) { + /* move to notify list */ + if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) + vl15_watchdog_deq(dd); + list_move_tail(lp, &dd->ipath_sdma_notifylist); + if (!list_empty(&dd->ipath_sdma_activelist)) { + lp = dd->ipath_sdma_activelist.next; + txp = list_entry(lp, struct ipath_sdma_txreq, + list); + start_idx = txp->start_idx; + } else { + lp = NULL; + txp = NULL; + } + } + progress = 1; + } + + if (progress) + tasklet_hi_schedule(&dd->ipath_sdma_notify_task); + +done: + return progress; +} + +static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list) +{ + struct ipath_sdma_txreq *txp, *txp_next; + + list_for_each_entry_safe(txp, txp_next, list, list) { + list_del_init(&txp->list); + + if (txp->callback) + (*txp->callback)(txp->callback_cookie, + txp->callback_status); + } +} + +static void sdma_notify_taskbody(struct ipath_devdata *dd) +{ + unsigned long flags; + struct list_head list; + + INIT_LIST_HEAD(&list); + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + + list_splice_init(&dd->ipath_sdma_notifylist, &list); + + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + + ipath_sdma_notify(dd, &list); + + /* + * The IB verbs layer needs to see the callback before getting + * the call to ipath_ib_piobufavail() because the callback + * handles releasing resources the next send will need. + * Otherwise, we could do these calls in + * ipath_sdma_make_progress(). + */ + ipath_ib_piobufavail(dd->verbs_dev); +} + +static void sdma_notify_task(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *)opaque; + + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + sdma_notify_taskbody(dd); +} + +static void dump_sdma_state(struct ipath_devdata *dd) +{ + unsigned long reg; + + reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus); + ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg); + + reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl); + ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg); + + reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0); + ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg); + + reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1); + ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg); + + reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2); + ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg); + + reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); + ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg); + + reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); + ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg); +} + +static void sdma_abort_task(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *) opaque; + u64 status; + unsigned long flags; + + if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + return; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + + status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK; + + /* nothing to do */ + if (status == IPATH_SDMA_ABORT_NONE) + goto unlock; + + /* ipath_sdma_abort() is done, waiting for interrupt */ + if (status == IPATH_SDMA_ABORT_DISARMED) { + if (jiffies < dd->ipath_sdma_abort_intr_timeout) + goto resched_noprint; + /* give up, intr got lost somewhere */ + ipath_dbg("give up waiting for SDMADISABLED intr\n"); + __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + status = IPATH_SDMA_ABORT_ABORTED; + } + + /* everything is stopped, time to clean up and restart */ + if (status == IPATH_SDMA_ABORT_ABORTED) { + struct ipath_sdma_txreq *txp, *txpnext; + u64 hwstatus; + int notify = 0; + + hwstatus = ipath_read_kreg64(dd, + dd->ipath_kregs->kr_senddmastatus); + + if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG | + IPATH_SDMA_STATUS_ABORT_IN_PROG | + IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) || + !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) { + if (dd->ipath_sdma_reset_wait > 0) { + /* not done shutting down sdma */ + --dd->ipath_sdma_reset_wait; + goto resched; + } + ipath_cdbg(VERBOSE, "gave up waiting for quiescent " + "status after SDMA reset, continuing\n"); + dump_sdma_state(dd); + } + + /* dequeue all "sent" requests */ + list_for_each_entry_safe(txp, txpnext, + &dd->ipath_sdma_activelist, list) { + txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED; + if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) + vl15_watchdog_deq(dd); + list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); + notify = 1; + } + if (notify) + tasklet_hi_schedule(&dd->ipath_sdma_notify_task); + + /* reset our notion of head and tail */ + dd->ipath_sdma_descq_tail = 0; + dd->ipath_sdma_descq_head = 0; + dd->ipath_sdma_head_dma[0] = 0; + dd->ipath_sdma_generation = 0; + dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added; + + /* Reset SendDmaLenGen */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, + (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18)); + + /* done with sdma state for a bit */ + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + + /* + * Don't restart sdma here (with the exception + * below). Wait until link is up to ACTIVE. VL15 MADs + * used to bring the link up use PIO, and multiple link + * transitions otherwise cause the sdma engine to be + * stopped and started multiple times. + * The disable is done here, including the shadow, + * so the state is kept consistent. + * See ipath_restart_sdma() for the actual starting + * of sdma. + */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* make sure I see next message */ + dd->ipath_sdma_abort_jiffies = 0; + + /* + * Not everything that takes SDMA offline is a link + * status change. If the link was up, restart SDMA. + */ + if (dd->ipath_flags & IPATH_LINKACTIVE) + ipath_restart_sdma(dd); + + goto done; + } + +resched: + /* + * for now, keep spinning + * JAG - this is bad to just have default be a loop without + * state change + */ + if (jiffies > dd->ipath_sdma_abort_jiffies) { + ipath_dbg("looping with status 0x%08lx\n", + dd->ipath_sdma_status); + dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; + } +resched_noprint: + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + return; + +unlock: + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); +done: + return; +} + +/* + * This is called from interrupt context. + */ +void ipath_sdma_intr(struct ipath_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + + (void) ipath_sdma_make_progress(dd); + + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); +} + +static int alloc_sdma(struct ipath_devdata *dd) +{ + int ret = 0; + + /* Allocate memory for SendDMA descriptor FIFO */ + dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev, + SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL); + + if (!dd->ipath_sdma_descq) { + ipath_dev_err(dd, "failed to allocate SendDMA descriptor " + "FIFO memory\n"); + ret = -ENOMEM; + goto done; + } + + dd->ipath_sdma_descq_cnt = + SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc); + + /* Allocate memory for DMA of head register to memory */ + dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev, + PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL); + if (!dd->ipath_sdma_head_dma) { + ipath_dev_err(dd, "failed to allocate SendDMA head memory\n"); + ret = -ENOMEM; + goto cleanup_descq; + } + dd->ipath_sdma_head_dma[0] = 0; + + init_timer(&dd->ipath_sdma_vl15_timer); + dd->ipath_sdma_vl15_timer.function = vl15_watchdog_timeout; + dd->ipath_sdma_vl15_timer.data = (unsigned long)dd; + atomic_set(&dd->ipath_sdma_vl15_count, 0); + + goto done; + +cleanup_descq: + dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ, + (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys); + dd->ipath_sdma_descq = NULL; + dd->ipath_sdma_descq_phys = 0; +done: + return ret; +} + +int setup_sdma(struct ipath_devdata *dd) +{ + int ret = 0; + unsigned i, n; + u64 tmp64; + u64 senddmabufmask[3] = { 0 }; + unsigned long flags; + + ret = alloc_sdma(dd); + if (ret) + goto done; + + if (!dd->ipath_sdma_descq) { + ipath_dev_err(dd, "SendDMA memory not allocated\n"); + goto done; + } + + /* + * Set initial status as if we had been up, then gone down. + * This lets initial start on transition to ACTIVE be the + * same as restart after link flap. + */ + dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED; + dd->ipath_sdma_abort_jiffies = 0; + dd->ipath_sdma_generation = 0; + dd->ipath_sdma_descq_tail = 0; + dd->ipath_sdma_descq_head = 0; + dd->ipath_sdma_descq_removed = 0; + dd->ipath_sdma_descq_added = 0; + + /* Set SendDmaBase */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, + dd->ipath_sdma_descq_phys); + /* Set SendDmaLenGen */ + tmp64 = dd->ipath_sdma_descq_cnt; + tmp64 |= 1<<18; /* enable generation checking */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64); + /* Set SendDmaTail */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, + dd->ipath_sdma_descq_tail); + /* Set SendDmaHeadAddr */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, + dd->ipath_sdma_head_phys); + + /* + * Reserve all the former "kernel" piobufs, using high number range + * so we get as many 4K buffers as possible + */ + n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved; + ipath_chg_pioavailkernel(dd, i, n - i , 0); + for (; i < n; ++i) { + unsigned word = i / 64; + unsigned bit = i & 63; + BUG_ON(word >= 3); + senddmabufmask[word] |= 1ULL << bit; + } + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, + senddmabufmask[0]); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, + senddmabufmask[1]); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, + senddmabufmask[2]); + + INIT_LIST_HEAD(&dd->ipath_sdma_activelist); + INIT_LIST_HEAD(&dd->ipath_sdma_notifylist); + + tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task, + (unsigned long) dd); + tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task, + (unsigned long) dd); + + /* + * No use to turn on SDMA here, as link is probably not ACTIVE + * Just mark it RUNNING and enable the interrupt, and let the + * ipath_restart_sdma() on link transition to ACTIVE actually + * enable it. + */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + +done: + return ret; +} + +void teardown_sdma(struct ipath_devdata *dd) +{ + struct ipath_sdma_txreq *txp, *txpnext; + unsigned long flags; + dma_addr_t sdma_head_phys = 0; + dma_addr_t sdma_descq_phys = 0; + void *sdma_descq = NULL; + void *sdma_head_dma = NULL; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); + __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + + tasklet_kill(&dd->ipath_sdma_abort_task); + tasklet_kill(&dd->ipath_sdma_notify_task); + + /* turn off sdma */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + /* dequeue all "sent" requests */ + list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist, + list) { + txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN; + if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) + vl15_watchdog_deq(dd); + list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); + } + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + + sdma_notify_taskbody(dd); + + del_timer_sync(&dd->ipath_sdma_vl15_timer); + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + + dd->ipath_sdma_abort_jiffies = 0; + + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0); + + if (dd->ipath_sdma_head_dma) { + sdma_head_dma = (void *) dd->ipath_sdma_head_dma; + sdma_head_phys = dd->ipath_sdma_head_phys; + dd->ipath_sdma_head_dma = NULL; + dd->ipath_sdma_head_phys = 0; + } + + if (dd->ipath_sdma_descq) { + sdma_descq = dd->ipath_sdma_descq; + sdma_descq_phys = dd->ipath_sdma_descq_phys; + dd->ipath_sdma_descq = NULL; + dd->ipath_sdma_descq_phys = 0; + } + + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + + if (sdma_head_dma) + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + sdma_head_dma, sdma_head_phys); + + if (sdma_descq) + dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ, + sdma_descq, sdma_descq_phys); +} + +/* + * [Re]start SDMA, if we use it, and it's not already OK. + * This is called on transition to link ACTIVE, either the first or + * subsequent times. + */ +void ipath_restart_sdma(struct ipath_devdata *dd) +{ + unsigned long flags; + int needed = 1; + + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) + goto bail; + + /* + * First, make sure we should, which is to say, + * check that we are "RUNNING" (not in teardown) + * and not "SHUTDOWN" + */ + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status) + || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + needed = 0; + else { + __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + } + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!needed) { + ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n", + dd->ipath_sdma_status); + goto bail; + } + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + /* + * First clear, just to be safe. Enable is only done + * in chip on 0->1 transition + */ + dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* notify upper layers */ + ipath_ib_piobufavail(dd->verbs_dev); + +bail: + return; +} + +static inline void make_sdma_desc(struct ipath_devdata *dd, + u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset) +{ + WARN_ON(addr & 3); + /* SDmaPhyAddr[47:32] */ + sdmadesc[1] = addr >> 32; + /* SDmaPhyAddr[31:0] */ + sdmadesc[0] = (addr & 0xfffffffcULL) << 32; + /* SDmaGeneration[1:0] */ + sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30; + /* SDmaDwordCount[10:0] */ + sdmadesc[0] |= (dwlen & 0x7ffULL) << 16; + /* SDmaBufOffset[12:2] */ + sdmadesc[0] |= dwoffset & 0x7ffULL; +} + +/* + * This function queues one IB packet onto the send DMA queue per call. + * The caller is responsible for checking: + * 1) The number of send DMA descriptor entries is less than the size of + * the descriptor queue. + * 2) The IB SGE addresses and lengths are 32-bit aligned + * (except possibly the last SGE's length) + * 3) The SGE addresses are suitable for passing to dma_map_single(). + */ +int ipath_sdma_verbs_send(struct ipath_devdata *dd, + struct ipath_sge_state *ss, u32 dwords, + struct ipath_verbs_txreq *tx) +{ + + unsigned long flags; + struct ipath_sge *sge; + int ret = 0; + u16 tail; + __le64 *descqp; + u64 sdmadesc[2]; + u32 dwoffset; + dma_addr_t addr; + + if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) { + ipath_dbg("packet size %X > ibmax %X, fail\n", + tx->map_len + (dwords<<2), dd->ipath_ibmaxlen); + ret = -EMSGSIZE; + goto fail; + } + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + +retry: + if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) { + ret = -EBUSY; + goto unlock; + } + + if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) { + if (ipath_sdma_make_progress(dd)) + goto retry; + ret = -ENOBUFS; + goto unlock; + } + + addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, + tx->map_len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, addr)) + goto ioerr; + + dwoffset = tx->map_len >> 2; + make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0); + + /* SDmaFirstDesc */ + sdmadesc[0] |= 1ULL << 12; + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) + sdmadesc[0] |= 1ULL << 14; /* SDmaUseLargeBuf */ + + /* write to the descq */ + tail = dd->ipath_sdma_descq_tail; + descqp = &dd->ipath_sdma_descq[tail].qw[0]; + *descqp++ = cpu_to_le64(sdmadesc[0]); + *descqp++ = cpu_to_le64(sdmadesc[1]); + + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC) + tx->txreq.start_idx = tail; + + /* increment the tail */ + if (++tail == dd->ipath_sdma_descq_cnt) { + tail = 0; + descqp = &dd->ipath_sdma_descq[0].qw[0]; + ++dd->ipath_sdma_generation; + } + + sge = &ss->sge; + while (dwords) { + u32 dw; + u32 len; + + len = dwords << 2; + if (len > sge->length) + len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + dw = (len + 3) >> 2; + addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, addr)) + goto unmap; + make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset); + /* SDmaUseLargeBuf has to be set in every descriptor */ + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) + sdmadesc[0] |= 1ULL << 14; + /* write to the descq */ + *descqp++ = cpu_to_le64(sdmadesc[0]); + *descqp++ = cpu_to_le64(sdmadesc[1]); + + /* increment the tail */ + if (++tail == dd->ipath_sdma_descq_cnt) { + tail = 0; + descqp = &dd->ipath_sdma_descq[0].qw[0]; + ++dd->ipath_sdma_generation; + } + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + + dwoffset += dw; + dwords -= dw; + } + + if (!tail) + descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0]; + descqp -= 2; + /* SDmaLastDesc */ + descqp[0] |= cpu_to_le64(1ULL << 11); + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) { + /* SDmaIntReq */ + descqp[0] |= cpu_to_le64(1ULL << 15); + } + + /* Commit writes to memory and advance the tail on the chip */ + wmb(); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail); + + tx->txreq.next_descq_idx = tail; + tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK; + dd->ipath_sdma_descq_tail = tail; + dd->ipath_sdma_descq_added += tx->txreq.sg_count; + list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist); + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15) + vl15_watchdog_enq(dd); + goto unlock; + +unmap: + while (tail != dd->ipath_sdma_descq_tail) { + if (!tail) + tail = dd->ipath_sdma_descq_cnt - 1; + else + tail--; + unmap_desc(dd, tail); + } +ioerr: + ret = -EIO; +unlock: + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); +fail: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c new file mode 100644 index 00000000..26271984 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "ipath_verbs.h" + +/** + * ipath_post_srq_receive - post a receive on a shared receive queue + * @ibsrq: the SRQ to post the receive on + * @wr: the list of work requests to post + * @bad_wr: the first WR to cause a problem is put here + * + * This may be called from interrupt context. + */ +int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct ipath_srq *srq = to_isrq(ibsrq); + struct ipath_rwq *wq; + unsigned long flags; + int ret; + + for (; wr; wr = wr->next) { + struct ipath_rwqe *wqe; + u32 next; + int i; + + if ((unsigned) wr->num_sge > srq->rq.max_sge) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&srq->rq.lock, flags); + wq = srq->rq.wq; + next = wq->head + 1; + if (next >= srq->rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&srq->rq.lock, flags); + *bad_wr = wr; + ret = -ENOMEM; + goto bail; + } + + wqe = get_rwqe_ptr(&srq->rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&srq->rq.lock, flags); + } + ret = 0; + +bail: + return ret; +} + +/** + * ipath_create_srq - create a shared receive queue + * @ibpd: the protection domain of the SRQ to create + * @srq_init_attr: the attributes of the SRQ + * @udata: data from libipathverbs when creating a user SRQ + */ +struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ipath_ibdev *dev = to_idev(ibpd->device); + struct ipath_srq *srq; + u32 sz; + struct ib_srq *ret; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + + if (srq_init_attr->attr.max_wr == 0) { + ret = ERR_PTR(-EINVAL); + goto done; + } + + if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) || + (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) { + ret = ERR_PTR(-EINVAL); + goto done; + } + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + ret = ERR_PTR(-ENOMEM); + goto done; + } + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + srq->rq.size = srq_init_attr->attr.max_wr + 1; + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + + sizeof(struct ipath_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz); + if (!srq->rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_srq; + } + + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz; + + srq->ip = + ipath_create_mmap_info(dev, s, + ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else + srq->ip = NULL; + + /* + * ib_create_srq() will initialize srq->ibsrq. + */ + spin_lock_init(&srq->rq.lock); + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; + srq->limit = srq_init_attr->attr.srq_limit; + + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == ib_ipath_max_srqs) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); + + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = &srq->ibsrq; + goto done; + +bail_ip: + kfree(srq->ip); +bail_wq: + vfree(srq->rq.wq); +bail_srq: + kfree(srq); +done: + return ret; +} + +/** + * ipath_modify_srq - modify a shared receive queue + * @ibsrq: the SRQ to modify + * @attr: the new attributes of the SRQ + * @attr_mask: indicates which attributes to modify + * @udata: user data for ipathverbs.so + */ +int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) +{ + struct ipath_srq *srq = to_isrq(ibsrq); + struct ipath_rwq *wq; + int ret = 0; + + if (attr_mask & IB_SRQ_MAX_WR) { + struct ipath_rwq *owq; + struct ipath_rwqe *p; + u32 sz, size, n, head, tail; + + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > ib_ipath_max_srq_wrs) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) { + ret = -EINVAL; + goto bail; + } + + sz = sizeof(struct ipath_rwqe) + + srq->rq.max_sge * sizeof(struct ib_sge); + size = attr->max_wr + 1; + wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz); + if (!wq) { + ret = -ENOMEM; + goto bail; + } + + /* Check that we can write the offset to mmap. */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = 0; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) + goto bail_free; + udata->outbuf = + (void __user *) (unsigned long) offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + if (head >= srq->rq.size) + head = 0; + tail = owq->tail; + if (tail >= srq->rq.size) + tail = 0; + n = head; + if (n < tail) + n += srq->rq.size - tail; + else + n -= tail; + if (size <= n) { + ret = -EINVAL; + goto bail_unlock; + } + n = 0; + p = wq->wq; + while (tail != head) { + struct ipath_rwqe *wqe; + int i; + + wqe = get_rwqe_ptr(&srq->rq, tail); + p->wr_id = wqe->wr_id; + p->num_sge = wqe->num_sge; + for (i = 0; i < wqe->num_sge; i++) + p->sg_list[i] = wqe->sg_list[i]; + n++; + p = (struct ipath_rwqe *)((char *) p + sz); + if (++tail >= srq->rq.size) + tail = 0; + } + srq->rq.wq = wq; + srq->rq.size = size; + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct ipath_mmap_info *ip = srq->ip; + struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); + u32 s = sizeof(struct ipath_rwq) + size * sz; + + ipath_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + } + goto bail; + +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); +bail: + return ret; +} + +int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct ipath_srq *srq = to_isrq(ibsrq); + + attr->max_wr = srq->rq.size - 1; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; +} + +/** + * ipath_destroy_srq - destroy a shared receive queue + * @ibsrq: the SRQ to destroy + */ +int ipath_destroy_srq(struct ib_srq *ibsrq) +{ + struct ipath_srq *srq = to_isrq(ibsrq); + struct ipath_ibdev *dev = to_idev(ibsrq->device); + + spin_lock(&dev->n_srqs_lock); + dev->n_srqs_allocated--; + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, ipath_release_mmap_info); + else + vfree(srq->rq.wq); + kfree(srq); + + return 0; +} diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c new file mode 100644 index 00000000..f63e143e --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ipath_kernel.h" + +struct infinipath_stats ipath_stats; + +/** + * ipath_snap_cntr - snapshot a chip counter + * @dd: the infinipath device + * @creg: the counter to snapshot + * + * called from add_timer and user counter read calls, to deal with + * counters that wrap in "human time". The words sent and received, and + * the packets sent and received are all that we worry about. For now, + * at least, we don't worry about error counters, because if they wrap + * that quickly, we probably don't care. We may eventually just make this + * handle all the counters. word counters can wrap in about 20 seconds + * of full bandwidth traffic, packet counters in a few hours. + */ + +u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) +{ + u32 val, reg64 = 0; + u64 val64; + unsigned long t0, t1; + u64 ret; + + t0 = jiffies; + /* If fast increment counters are only 32 bits, snapshot them, + * and maintain them as 64bit values in the driver */ + if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) && + (creg == dd->ipath_cregs->cr_wordsendcnt || + creg == dd->ipath_cregs->cr_wordrcvcnt || + creg == dd->ipath_cregs->cr_pktsendcnt || + creg == dd->ipath_cregs->cr_pktrcvcnt)) { + val64 = ipath_read_creg(dd, creg); + val = val64 == ~0ULL ? ~0U : 0; + reg64 = 1; + } else /* val64 just to keep gcc quiet... */ + val64 = val = ipath_read_creg32(dd, creg); + /* + * See if a second has passed. This is just a way to detect things + * that are quite broken. Normally this should take just a few + * cycles (the check is for long enough that we don't care if we get + * pre-empted.) An Opteron HT O read timeout is 4 seconds with + * normal NB values + */ + t1 = jiffies; + if (time_before(t0 + HZ, t1) && val == -1) { + ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n", + creg); + ret = 0ULL; + goto bail; + } + if (reg64) { + ret = val64; + goto bail; + } + + if (creg == dd->ipath_cregs->cr_wordsendcnt) { + if (val != dd->ipath_lastsword) { + dd->ipath_sword += val - dd->ipath_lastsword; + dd->ipath_lastsword = val; + } + val64 = dd->ipath_sword; + } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { + if (val != dd->ipath_lastrword) { + dd->ipath_rword += val - dd->ipath_lastrword; + dd->ipath_lastrword = val; + } + val64 = dd->ipath_rword; + } else if (creg == dd->ipath_cregs->cr_pktsendcnt) { + if (val != dd->ipath_lastspkts) { + dd->ipath_spkts += val - dd->ipath_lastspkts; + dd->ipath_lastspkts = val; + } + val64 = dd->ipath_spkts; + } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) { + if (val != dd->ipath_lastrpkts) { + dd->ipath_rpkts += val - dd->ipath_lastrpkts; + dd->ipath_lastrpkts = val; + } + val64 = dd->ipath_rpkts; + } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->ibsymsnap; + val64 -= dd->ibsymdelta; + } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->iblnkerrsnap; + val64 -= dd->iblnkerrdelta; + } else + val64 = (u64) val; + + ret = val64; + +bail: + return ret; +} + +/** + * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports + * @dd: the infinipath device + * + * print the delta of egrfull/hdrqfull errors for kernel ports no more than + * every 5 seconds. User processes are printed at close, but kernel doesn't + * close, so... Separate routine so may call from other places someday, and + * so function name when printed by _IPATH_INFO is meaningfull + */ +static void ipath_qcheck(struct ipath_devdata *dd) +{ + static u64 last_tot_hdrqfull; + struct ipath_portdata *pd = dd->ipath_pd[0]; + size_t blen = 0; + char buf[128]; + u32 hdrqtail; + + *buf = 0; + if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) { + blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", + pd->port_hdrqfull - + dd->ipath_p0_hdrqfull); + dd->ipath_p0_hdrqfull = pd->port_hdrqfull; + } + if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { + blen += snprintf(buf + blen, sizeof buf - blen, + "%srcvegrfull %llu", + blen ? ", " : "", + (unsigned long long) + (ipath_stats.sps_etidfull - + dd->ipath_last_tidfull)); + dd->ipath_last_tidfull = ipath_stats.sps_etidfull; + } + + /* + * this is actually the number of hdrq full interrupts, not actual + * events, but at the moment that's mostly what I'm interested in. + * Actual count, etc. is in the counters, if needed. For production + * users this won't ordinarily be printed. + */ + + if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) && + ipath_stats.sps_hdrqfull != last_tot_hdrqfull) { + blen += snprintf(buf + blen, sizeof buf - blen, + "%shdrqfull %llu (all ports)", + blen ? ", " : "", + (unsigned long long) + (ipath_stats.sps_hdrqfull - + last_tot_hdrqfull)); + last_tot_hdrqfull = ipath_stats.sps_hdrqfull; + } + if (blen) + ipath_dbg("%s\n", buf); + + hdrqtail = ipath_get_hdrqtail(pd); + if (pd->port_head != hdrqtail) { + if (dd->ipath_lastport0rcv_cnt == + ipath_stats.sps_port0pkts) { + ipath_cdbg(PKT, "missing rcv interrupts? " + "port0 hd=%x tl=%x; port0pkts %llx; write" + " hd (w/intr)\n", + pd->port_head, hdrqtail, + (unsigned long long) + ipath_stats.sps_port0pkts); + ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail | + dd->ipath_rhdrhead_intr_off, pd->port_port); + } + dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts; + } +} + +static void ipath_chk_errormask(struct ipath_devdata *dd) +{ + static u32 fixed; + u32 ctrl; + unsigned long errormask; + unsigned long hwerrs; + + if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED)) + return; + + errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); + + if (errormask == dd->ipath_errormask) + return; + fixed++; + + hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); + ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); + + if ((hwerrs & dd->ipath_hwerrmask) || + (ctrl & INFINIPATH_C_FREEZEMODE)) { + /* force re-interrupt of pending events, just in case */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); + dev_info(&dd->pcidev->dev, + "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n", + fixed, errormask, (unsigned long)dd->ipath_errormask, + ctrl, hwerrs); + } else + ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n", + fixed, errormask, + (unsigned long)dd->ipath_errormask); +} + + +/** + * ipath_get_faststats - get word counters from chip before they overflow + * @opaque - contains a pointer to the infinipath device ipath_devdata + * + * called from add_timer + */ +void ipath_get_faststats(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *) opaque; + int i; + static unsigned cnt; + unsigned long flags; + u64 traffic_wds; + + /* + * don't access the chip while running diags, or memory diags can + * fail + */ + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) || + ipath_diag_inuse) + /* but re-arm the timer, for diags case; won't hurt other */ + goto done; + + /* + * We now try to maintain a "active timer", based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + traffic_wds -= dd->ipath_traffic_wds; + dd->ipath_traffic_wds += traffic_wds; + if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(5, &dd->ipath_active_time); /* S/B #define */ + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + + if (dd->ipath_flags & IPATH_32BITCOUNTERS) { + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + } + + ipath_qcheck(dd); + + /* + * deal with repeat error suppression. Doesn't really matter if + * last error was almost a full interval ago, or just a few usecs + * ago; still won't get more than 2 per interval. We may want + * longer intervals for this eventually, could do with mod, counter + * or separate timer. Also see code in ipath_handle_errors() and + * ipath_handle_hwerrors(). + */ + + if (dd->ipath_lasterror) + dd->ipath_lasterror = 0; + if (dd->ipath_lasthwerror) + dd->ipath_lasthwerror = 0; + if (dd->ipath_maskederrs + && time_after(jiffies, dd->ipath_unmasktime)) { + char ebuf[256]; + int iserr; + iserr = ipath_decode_err(dd, ebuf, sizeof ebuf, + dd->ipath_maskederrs); + if (dd->ipath_maskederrs & + ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_PKTERRS)) + ipath_dev_err(dd, "Re-enabling masked errors " + "(%s)\n", ebuf); + else { + /* + * rcvegrfull and rcvhdrqfull are "normal", for some + * types of processes (mostly benchmarks) that send + * huge numbers of messages, while not processing + * them. So only complain about these at debug + * level. + */ + if (iserr) + ipath_dbg( + "Re-enabling queue full errors (%s)\n", + ebuf); + else + ipath_cdbg(ERRPKT, "Re-enabling packet" + " problem interrupt (%s)\n", ebuf); + } + + /* re-enable masked errors */ + dd->ipath_errormask |= dd->ipath_maskederrs; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); + dd->ipath_maskederrs = 0; + } + + /* limit qfull messages to ~one per minute per port */ + if ((++cnt & 0x10)) { + for (i = (int) dd->ipath_cfgports; --i >= 0; ) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + if (pd && pd->port_lastrcvhdrqtail != -1) + pd->port_lastrcvhdrqtail = -1; + } + } + + ipath_chk_errormask(dd); +done: + mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); +} diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c new file mode 100644 index 00000000..75558f33 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -0,0 +1,1238 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "ipath_kernel.h" +#include "ipath_verbs.h" +#include "ipath_common.h" + +/** + * ipath_parse_ushort - parse an unsigned short value in an arbitrary base + * @str: the string containing the number + * @valp: where to put the result + * + * returns the number of bytes consumed, or negative value on error + */ +int ipath_parse_ushort(const char *str, unsigned short *valp) +{ + unsigned long val; + char *end; + int ret; + + if (!isdigit(str[0])) { + ret = -EINVAL; + goto bail; + } + + val = simple_strtoul(str, &end, 0); + + if (val > 0xffff) { + ret = -EINVAL; + goto bail; + } + + *valp = val; + + ret = end + 1 - str; + if (ret == 0) + ret = -EINVAL; + +bail: + return ret; +} + +static ssize_t show_version(struct device_driver *dev, char *buf) +{ + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version); +} + +static ssize_t show_num_units(struct device_driver *dev, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", + ipath_count_units(NULL, NULL, NULL)); +} + +static ssize_t show_status(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + ssize_t ret; + + if (!dd->ipath_statusp) { + ret = -EINVAL; + goto bail; + } + + ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", + (unsigned long long) *(dd->ipath_statusp)); + +bail: + return ret; +} + +static const char *ipath_status_str[] = { + "Initted", + "Disabled", + "Admin_Disabled", + "", /* This used to be the old "OIB_SMA" status. */ + "", /* This used to be the old "SMA" status. */ + "Present", + "IB_link_up", + "IB_configured", + "NoIBcable", + "Fatal_Hardware_Error", + NULL, +}; + +static ssize_t show_status_str(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int i, any; + u64 s; + ssize_t ret; + + if (!dd->ipath_statusp) { + ret = -EINVAL; + goto bail; + } + + s = *(dd->ipath_statusp); + *buf = '\0'; + for (any = i = 0; s && ipath_status_str[i]; i++) { + if (s & 1) { + if (any && strlcat(buf, " ", PAGE_SIZE) >= + PAGE_SIZE) + /* overflow */ + break; + if (strlcat(buf, ipath_status_str[i], + PAGE_SIZE) >= PAGE_SIZE) + break; + any = 1; + } + s >>= 1; + } + if (any) + strlcat(buf, "\n", PAGE_SIZE); + + ret = strlen(buf); + +bail: + return ret; +} + +static ssize_t show_boardversion(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion); +} + +static ssize_t show_localbus_info(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info); +} + +static ssize_t show_lmc(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc); +} + +static ssize_t store_lmc(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 lmc = 0; + int ret; + + ret = ipath_parse_ushort(buf, &lmc); + if (ret < 0) + goto invalid; + + if (lmc > 7) { + ret = -EINVAL; + goto invalid; + } + + ipath_set_lid(dd, dd->ipath_lid, lmc); + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc); +bail: + return ret; +} + +static ssize_t show_lid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid); +} + +static ssize_t store_lid(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 lid = 0; + int ret; + + ret = ipath_parse_ushort(buf, &lid); + if (ret < 0) + goto invalid; + + if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) { + ret = -EINVAL; + goto invalid; + } + + ipath_set_lid(dd, lid, dd->ipath_lmc); + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid); +bail: + return ret; +} + +static ssize_t show_mlid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid); +} + +static ssize_t store_mlid(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 mlid; + int ret; + + ret = ipath_parse_ushort(buf, &mlid); + if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE) + goto invalid; + + dd->ipath_mlid = mlid; + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid MLID\n"); +bail: + return ret; +} + +static ssize_t show_guid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u8 *guid; + + guid = (u8 *) & (dd->ipath_guid); + + return scnprintf(buf, PAGE_SIZE, + "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", + guid[0], guid[1], guid[2], guid[3], + guid[4], guid[5], guid[6], guid[7]); +} + +static ssize_t store_guid(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + ssize_t ret; + unsigned short guid[8]; + __be64 new_guid; + u8 *ng; + int i; + + if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx", + &guid[0], &guid[1], &guid[2], &guid[3], + &guid[4], &guid[5], &guid[6], &guid[7]) != 8) + goto invalid; + + ng = (u8 *) &new_guid; + + for (i = 0; i < 8; i++) { + if (guid[i] > 0xff) + goto invalid; + ng[i] = guid[i]; + } + + if (new_guid == 0) + goto invalid; + + dd->ipath_guid = new_guid; + dd->ipath_nguid = 1; + if (dd->verbs_dev) + dd->verbs_dev->ibdev.node_guid = new_guid; + + ret = strlen(buf); + goto bail; + +invalid: + ipath_dev_err(dd, "attempt to set invalid GUID\n"); + ret = -EINVAL; + +bail: + return ret; +} + +static ssize_t show_nguid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid); +} + +static ssize_t show_nports(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + /* Return the number of user ports available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1); +} + +static ssize_t show_serial(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + buf[sizeof dd->ipath_serial] = '\0'; + memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial); + strcat(buf, "\n"); + return strlen(buf); +} + +static ssize_t show_unit(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit); +} + +static ssize_t show_jint_max_packets(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets); +} + +static ssize_t store_jint_max_packets(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 v = 0; + int ret; + + ret = ipath_parse_ushort(buf, &v); + if (ret < 0) + ipath_dev_err(dd, "invalid jint_max_packets.\n"); + else + dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v); + + return ret; +} + +static ssize_t show_jint_idle_ticks(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks); +} + +static ssize_t store_jint_idle_ticks(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 v = 0; + int ret; + + ret = ipath_parse_ushort(buf, &v); + if (ret < 0) + ipath_dev_err(dd, "invalid jint_idle_ticks.\n"); + else + dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets); + + return ret; +} + +#define DEVICE_COUNTER(name, attr) \ + static ssize_t show_counter_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct ipath_devdata *dd = dev_get_drvdata(dev); \ + return scnprintf(\ + buf, PAGE_SIZE, "%llu\n", (unsigned long long) \ + ipath_snap_cntr( \ + dd, offsetof(struct infinipath_counters, \ + attr) / sizeof(u64))); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL); + +DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt); +DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt); +DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt); +DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt); +DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt); +DEVICE_COUNTER(lb_ints, LBIntCnt); +DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt); +DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt); +DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt); +DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt); +DEVICE_COUNTER(rx_dwords, RxDwordCnt); +DEVICE_COUNTER(rx_ebps, RxEBPCnt); +DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt); +DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt); +DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt); +DEVICE_COUNTER(rx_len_errs, RxLenErrCnt); +DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt); +DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt); +DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt); +DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt); +DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt); +DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt); +DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt); +DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt); +DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt); +DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt); +DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt); +DEVICE_COUNTER(tx_dwords, TxDwordCnt); +DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt); +DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt); +DEVICE_COUNTER(tx_len_errs, TxLenErrCnt); +DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt); +DEVICE_COUNTER(tx_underruns, TxUnderrunCnt); +DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt); + +static struct attribute *dev_counter_attributes[] = { + &dev_attr_ib_link_downeds.attr, + &dev_attr_ib_link_err_recoveries.attr, + &dev_attr_ib_status_changes.attr, + &dev_attr_ib_symbol_errs.attr, + &dev_attr_lb_flow_stalls.attr, + &dev_attr_lb_ints.attr, + &dev_attr_rx_bad_formats.attr, + &dev_attr_rx_buf_ovfls.attr, + &dev_attr_rx_data_pkts.attr, + &dev_attr_rx_dropped_pkts.attr, + &dev_attr_rx_dwords.attr, + &dev_attr_rx_ebps.attr, + &dev_attr_rx_flow_ctrl_errs.attr, + &dev_attr_rx_flow_pkts.attr, + &dev_attr_rx_icrc_errs.attr, + &dev_attr_rx_len_errs.attr, + &dev_attr_rx_link_problems.attr, + &dev_attr_rx_lpcrc_errs.attr, + &dev_attr_rx_max_min_len_errs.attr, + &dev_attr_rx_p0_hdr_egr_ovfls.attr, + &dev_attr_rx_p1_hdr_egr_ovfls.attr, + &dev_attr_rx_p2_hdr_egr_ovfls.attr, + &dev_attr_rx_p3_hdr_egr_ovfls.attr, + &dev_attr_rx_p4_hdr_egr_ovfls.attr, + &dev_attr_rx_p5_hdr_egr_ovfls.attr, + &dev_attr_rx_p6_hdr_egr_ovfls.attr, + &dev_attr_rx_p7_hdr_egr_ovfls.attr, + &dev_attr_rx_p8_hdr_egr_ovfls.attr, + &dev_attr_rx_pkey_mismatches.attr, + &dev_attr_rx_tid_full_errs.attr, + &dev_attr_rx_tid_valid_errs.attr, + &dev_attr_rx_vcrc_errs.attr, + &dev_attr_tx_data_pkts.attr, + &dev_attr_tx_dropped_pkts.attr, + &dev_attr_tx_dwords.attr, + &dev_attr_tx_flow_pkts.attr, + &dev_attr_tx_flow_stalls.attr, + &dev_attr_tx_len_errs.attr, + &dev_attr_tx_max_min_len_errs.attr, + &dev_attr_tx_underruns.attr, + &dev_attr_tx_unsup_vl_errs.attr, + NULL +}; + +static struct attribute_group dev_counter_attr_group = { + .name = "counters", + .attrs = dev_counter_attributes +}; + +static ssize_t store_reset(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + if (count < 5 || memcmp(buf, "reset", 5)) { + ret = -EINVAL; + goto bail; + } + + if (dd->ipath_flags & IPATH_DISABLED) { + /* + * post-reset init would re-enable interrupts, etc. + * so don't allow reset on disabled devices. Not + * perfect error, but about the best choice. + */ + dev_info(dev,"Unit %d is disabled, can't reset\n", + dd->ipath_unit); + ret = -EINVAL; + goto bail; + } + ret = ipath_reset_device(dd->ipath_unit); +bail: + return ret<0 ? ret : count; +} + +static ssize_t store_link_state(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 state; + + ret = ipath_parse_ushort(buf, &state); + if (ret < 0) + goto invalid; + + r = ipath_set_linkstate(dd, state); + if (r < 0) { + ret = r; + goto bail; + } + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid link state\n"); +bail: + return ret; +} + +static ssize_t show_mtu(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu); +} + +static ssize_t store_mtu(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + ssize_t ret; + u16 mtu = 0; + int r; + + ret = ipath_parse_ushort(buf, &mtu); + if (ret < 0) + goto invalid; + + r = ipath_set_mtu(dd, mtu); + if (r < 0) + ret = r; + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid MTU\n"); +bail: + return ret; +} + +static ssize_t show_enabled(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + return scnprintf(buf, PAGE_SIZE, "%u\n", + (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1); +} + +static ssize_t store_enabled(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + ssize_t ret; + u16 enable = 0; + + ret = ipath_parse_ushort(buf, &enable); + if (ret < 0) { + ipath_dev_err(dd, "attempt to use non-numeric on enable\n"); + goto bail; + } + + if (enable) { + if (!(dd->ipath_flags & IPATH_DISABLED)) + goto bail; + + dev_info(dev, "Enabling unit %d\n", dd->ipath_unit); + /* same as post-reset */ + ret = ipath_init_chip(dd, 1); + if (ret) + ipath_dev_err(dd, "Failed to enable unit %d\n", + dd->ipath_unit); + else { + dd->ipath_flags &= ~IPATH_DISABLED; + *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED; + } + } + else if (!(dd->ipath_flags & IPATH_DISABLED)) { + dev_info(dev, "Disabling unit %d\n", dd->ipath_unit); + ipath_shutdown_device(dd); + dd->ipath_flags |= IPATH_DISABLED; + *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED; + } + +bail: + return ret; +} + +static ssize_t store_rx_pol_inv(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret < 0) + goto invalid; + + r = ipath_set_rx_pol_inv(dd, val); + if (r < 0) { + ret = r; + goto bail; + } + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n"); +bail: + return ret; +} + +static ssize_t store_led_override(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret > 0) + ipath_set_led_override(dd, val); + else + ipath_dev_err(dd, "attempt to set invalid LED override\n"); + return ret; +} + +static ssize_t show_logged_errs(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int idx, count; + + /* force consistency with actual EEPROM */ + if (ipath_update_eeprom_log(dd) != 0) + return -ENXIO; + + count = 0; + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", + dd->ipath_eep_st_errs[idx], + idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' '); + } + + return count; +} + +/* + * New sysfs entries to control various IB config. These all turn into + * accesses via ipath_f_get/set_ib_cfg. + * + * Get/Set heartbeat enable. Or of 1=enabled, 2=auto + */ +static ssize_t show_hrtbt_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_hrtbt_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && val > 3) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + goto bail; + } + + /* + * Set the "intentional" heartbeat enable per either of + * "Enable" and "Auto", as these are normally set together. + * This bit is consulted when leaving loopback mode, + * because entering loopback mode overrides it and automatically + * disables heartbeat. + */ + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val); + if (r < 0) + ret = r; + else if (val == IPATH_IB_HRTBT_OFF) + dd->ipath_flags |= IPATH_NO_HRTBT; + else + dd->ipath_flags &= ~IPATH_NO_HRTBT; + +bail: + return ret; +} + +/* + * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric, + * _not_ the particular encoding of any given chip) + */ +static ssize_t show_lwid_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_lwid_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && (val == 0 || val > 3)) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, + "attempt to set invalid Link Width (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +/* Get current link width */ +static ssize_t show_lwid(struct device *dev, + struct device_attribute *attr, + char *buf) + +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +/* + * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR. + */ +static ssize_t show_spd_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_spd_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR))) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, + "attempt to set invalid Link Speed (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +/* Get current link speed */ +static ssize_t show_spd(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +/* + * Get/Set RX polarity-invert enable. 0=no, 1=yes. + */ +static ssize_t show_rx_polinv_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_rx_polinv_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && val > 1) { + ipath_dev_err(dd, + "attempt to set invalid Rx Polarity (enable)\n"); + ret = -EINVAL; + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +/* + * Get/Set RX lane-reversal enable. 0=no, 1=yes. + */ +static ssize_t show_lanerev_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_lanerev_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && val > 1) { + ret = -EINVAL; + ipath_dev_err(dd, + "attempt to set invalid Lane reversal (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); +static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); + +static struct attribute *driver_attributes[] = { + &driver_attr_num_units.attr, + &driver_attr_version.attr, + NULL +}; + +static struct attribute_group driver_attr_group = { + .attrs = driver_attributes +}; + +static ssize_t store_tempsense(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, stat; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret <= 0) { + ipath_dev_err(dd, "attempt to set invalid tempsense config\n"); + goto bail; + } + /* If anything but the highest limit, enable T_CRIT_A "interrupt" */ + stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0); + if (stat) { + ipath_dev_err(dd, "Unable to set tempsense config\n"); + ret = -1; + goto bail; + } + stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF)); + if (stat) { + ipath_dev_err(dd, "Unable to set local Tcrit\n"); + ret = -1; + goto bail; + } + stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8)); + if (stat) { + ipath_dev_err(dd, "Unable to set remote Tcrit\n"); + ret = -1; + goto bail; + } + +bail: + return ret; +} + +/* + * dump tempsense regs. in decimal, to ease shell-scripts. + */ +static ssize_t show_tempsense(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + int idx; + u8 regvals[8]; + + ret = -ENXIO; + for (idx = 0; idx < 8; ++idx) { + if (idx == 6) + continue; + ret = ipath_tempsense_read(dd, idx); + if (ret < 0) + break; + regvals[idx] = ret; + } + if (idx == 8) + ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", + *(signed char *)(regvals), + *(signed char *)(regvals + 1), + regvals[2], regvals[3], + *(signed char *)(regvals + 5), + *(signed char *)(regvals + 7)); + return ret; +} + +const struct attribute_group *ipath_driver_attr_groups[] = { + &driver_attr_group, + NULL, +}; + +static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid); +static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc); +static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid); +static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state); +static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid); +static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu); +static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled); +static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL); +static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL); +static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset); +static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); +static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); +static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); +static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); +static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); +static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); +static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); +static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); +static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); +static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO, + show_jint_max_packets, store_jint_max_packets); +static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO, + show_jint_idle_ticks, store_jint_idle_ticks); +static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO, + show_tempsense, store_tempsense); + +static struct attribute *dev_attributes[] = { + &dev_attr_guid.attr, + &dev_attr_lmc.attr, + &dev_attr_lid.attr, + &dev_attr_link_state.attr, + &dev_attr_mlid.attr, + &dev_attr_mtu.attr, + &dev_attr_nguid.attr, + &dev_attr_nports.attr, + &dev_attr_serial.attr, + &dev_attr_status.attr, + &dev_attr_status_str.attr, + &dev_attr_boardversion.attr, + &dev_attr_unit.attr, + &dev_attr_enabled.attr, + &dev_attr_rx_pol_inv.attr, + &dev_attr_led_override.attr, + &dev_attr_logged_errors.attr, + &dev_attr_tempsense.attr, + &dev_attr_localbus_info.attr, + NULL +}; + +static struct attribute_group dev_attr_group = { + .attrs = dev_attributes +}; + +static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb, + store_hrtbt_enb); +static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb, + store_lwid_enb); +static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL); +static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb, + store_spd_enb); +static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL); +static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb, + store_rx_polinv_enb); +static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb, + store_lanerev_enb); + +static struct attribute *dev_ibcfg_attributes[] = { + &dev_attr_hrtbt_enable.attr, + &dev_attr_link_width_enable.attr, + &dev_attr_link_width.attr, + &dev_attr_link_speed_enable.attr, + &dev_attr_link_speed.attr, + &dev_attr_rx_pol_inv_enable.attr, + &dev_attr_rx_lane_rev_enable.attr, + NULL +}; + +static struct attribute_group dev_ibcfg_attr_group = { + .attrs = dev_ibcfg_attributes +}; + +/** + * ipath_expose_reset - create a device reset file + * @dev: the device structure + * + * Only expose a file that lets us reset the device after someone + * enters diag mode. A device reset is quite likely to crash the + * machine entirely, so we don't want to normally make it + * available. + * + * Called with ipath_mutex held. + */ +int ipath_expose_reset(struct device *dev) +{ + static int exposed; + int ret; + + if (!exposed) { + ret = device_create_file(dev, &dev_attr_reset); + exposed = 1; + } + else + ret = 0; + + return ret; +} + +int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) +{ + int ret; + + ret = sysfs_create_group(&dev->kobj, &dev_attr_group); + if (ret) + goto bail; + + ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group); + if (ret) + goto bail_attrs; + + if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { + ret = device_create_file(dev, &dev_attr_jint_idle_ticks); + if (ret) + goto bail_counter; + ret = device_create_file(dev, &dev_attr_jint_max_packets); + if (ret) + goto bail_idle; + + ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group); + if (ret) + goto bail_max; + } + + return 0; + +bail_max: + device_remove_file(dev, &dev_attr_jint_max_packets); +bail_idle: + device_remove_file(dev, &dev_attr_jint_idle_ticks); +bail_counter: + sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); +bail_attrs: + sysfs_remove_group(&dev->kobj, &dev_attr_group); +bail: + return ret; +} + +void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) +{ + sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); + + if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { + sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group); + device_remove_file(dev, &dev_attr_jint_idle_ticks); + device_remove_file(dev, &dev_attr_jint_max_packets); + } + + sysfs_remove_group(&dev->kobj, &dev_attr_group); + + device_remove_file(dev, &dev_attr_reset); +} diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c new file mode 100644 index 00000000..22e60998 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ipath_verbs.h" +#include "ipath_kernel.h" + +/* cut down ridiculously long IB macro names */ +#define OP(x) IB_OPCODE_UC_##x + +/** + * ipath_make_uc_req - construct a request packet (SEND, RDMA write) + * @qp: a pointer to the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int ipath_make_uc_req(struct ipath_qp *qp) +{ + struct ipath_other_headers *ohdr; + struct ipath_swqe *wqe; + unsigned long flags; + u32 hwords; + u32 bth0; + u32 len; + u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + int ret = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } + + ohdr = &qp->s_hdr.u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr.u.l.oth; + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + bth0 = 1 << 22; /* Set M bit */ + + /* Get the next send request. */ + wqe = get_swqe_ptr(qp, qp->s_cur); + qp->s_wqe = NULL; + switch (qp->s_state) { + default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; + /* Check if send work queue is empty. */ + if (qp->s_cur == qp->s_head) + goto bail; + /* + * Start a new request. + */ + qp->s_psn = wqe->psn = qp->s_next_psn; + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_len = len = wqe->length; + switch (wqe->wr.opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + if (len > pmtu) { + qp->s_state = OP(SEND_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_ONLY); + else { + qp->s_state = + OP(SEND_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + hwords += sizeof(struct ib_reth) / 4; + if (len > pmtu) { + qp->s_state = OP(RDMA_WRITE_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_ONLY); + else { + qp->s_state = + OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the RETH */ + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + default: + goto bail; + } + break; + + case OP(SEND_FIRST): + qp->s_state = OP(SEND_MIDDLE); + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_LAST); + else { + qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_WRITE_FIRST): + qp->s_state = OP(RDMA_WRITE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_LAST); + else { + qp->s_state = + OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + } + qp->s_len -= len; + qp->s_hdrwords = hwords; + qp->s_cur_sge = &qp->s_sge; + qp->s_cur_size = len; + ipath_make_ruc_header(to_idev(qp->ibqp.device), + qp, ohdr, bth0 | (qp->s_state << 24), + qp->s_next_psn++ & IPATH_PSN_MASK); +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * ipath_uc_rcv - handle an incoming UC packet + * @dev: the device the packet came in on + * @hdr: the header of the packet + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the length of the packet + * @qp: the QP for this packet. + * + * This is called from ipath_qp_rcv() to process an incoming UC packet + * for the given QP. + * Called at interrupt level. + */ +void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct ipath_qp *qp) +{ + struct ipath_other_headers *ohdr; + int opcode; + u32 hdrsize; + u32 psn; + u32 pad; + struct ib_wc wc; + u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + struct ib_reth *reth; + int header_in_data; + + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12; /* LRH + BTH */ + psn = be32_to_cpu(ohdr->bth[2]); + header_in_data = 0; + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ + /* + * The header with GRH is 60 bytes and the + * core driver sets the eager header buffer + * size to 56 bytes so the last 4 bytes of + * the BTH header (PSN) is in the data buffer. + */ + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; + if (header_in_data) { + psn = be32_to_cpu(((__be32 *) data)[0]); + data += sizeof(__be32); + } else + psn = be32_to_cpu(ohdr->bth[2]); + } + /* + * The opcode is in the low byte when its in network order + * (top byte when in host order). + */ + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + + memset(&wc, 0, sizeof wc); + + /* Compare the PSN verses the expected PSN. */ + if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { + /* + * Handle a sequence error. + * Silently drop any current message. + */ + qp->r_psn = psn; + inv: + qp->r_state = OP(SEND_LAST); + switch (opcode) { + case OP(SEND_FIRST): + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): + goto send_first; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + goto rdma_first; + + default: + dev->n_pkt_drops++; + goto done; + } + } + + /* Check for opcode sequence errors. */ + switch (qp->r_state) { + case OP(SEND_FIRST): + case OP(SEND_MIDDLE): + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + break; + goto inv; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_MIDDLE): + if (opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + break; + goto inv; + + default: + if (opcode == OP(SEND_FIRST) || + opcode == OP(SEND_ONLY) || + opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_FIRST) || + opcode == OP(RDMA_WRITE_ONLY) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + break; + goto inv; + } + + /* OK, process the packet. */ + switch (opcode) { + case OP(SEND_FIRST): + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): + send_first: + if (qp->r_flags & IPATH_R_REUSE_SGE) { + qp->r_flags &= ~IPATH_R_REUSE_SGE; + qp->r_sge = qp->s_rdma_read_sge; + } else if (!ipath_get_rwqe(qp, 0)) { + dev->n_pkt_drops++; + goto done; + } + /* Save the WQE so we can reuse it in case of an error. */ + qp->s_rdma_read_sge = qp->r_sge; + qp->r_rcv_len = 0; + if (opcode == OP(SEND_ONLY)) + goto send_last; + else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) + goto send_last_imm; + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) { + qp->r_flags |= IPATH_R_REUSE_SGE; + dev->n_pkt_drops++; + goto done; + } + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) { + qp->r_flags |= IPATH_R_REUSE_SGE; + dev->n_pkt_drops++; + goto done; + } + ipath_copy_sge(&qp->r_sge, data, pmtu); + break; + + case OP(SEND_LAST_WITH_IMMEDIATE): + send_last_imm: + if (header_in_data) { + wc.ex.imm_data = *(__be32 *) data; + data += sizeof(__be32); + } else { + /* Immediate data comes after BTH */ + wc.ex.imm_data = ohdr->u.imm_data; + } + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + /* FALLTHROUGH */ + case OP(SEND_LAST): + send_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) { + qp->r_flags |= IPATH_R_REUSE_SGE; + dev->n_pkt_drops++; + goto done; + } + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + wc.byte_len = tlen + qp->r_rcv_len; + if (unlikely(wc.byte_len > qp->r_len)) { + qp->r_flags |= IPATH_R_REUSE_SGE; + dev->n_pkt_drops++; + goto done; + } + wc.opcode = IB_WC_RECV; + last_imm: + ipath_copy_sge(&qp->r_sge, data, tlen); + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + /* Signal completion event if the solicited bit is set. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(1 << 23)) != 0); + break; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ + rdma_first: + /* RETH comes after BTH */ + if (!header_in_data) + reth = &ohdr->u.rc.reth; + else { + reth = (struct ib_reth *)data; + data += sizeof(*reth); + } + hdrsize += sizeof(*reth); + qp->r_len = be32_to_cpu(reth->length); + qp->r_rcv_len = 0; + if (qp->r_len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey */ + ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, + vaddr, rkey, + IB_ACCESS_REMOTE_WRITE); + if (unlikely(!ok)) { + dev->n_pkt_drops++; + goto done; + } + } else { + qp->r_sge.sg_list = NULL; + qp->r_sge.sge.mr = NULL; + qp->r_sge.sge.vaddr = NULL; + qp->r_sge.sge.length = 0; + qp->r_sge.sge.sge_length = 0; + } + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) { + dev->n_pkt_drops++; + goto done; + } + if (opcode == OP(RDMA_WRITE_ONLY)) + goto rdma_last; + else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + goto rdma_last_imm; + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) { + dev->n_pkt_drops++; + goto done; + } + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) { + dev->n_pkt_drops++; + goto done; + } + ipath_copy_sge(&qp->r_sge, data, pmtu); + break; + + case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): + rdma_last_imm: + if (header_in_data) { + wc.ex.imm_data = *(__be32 *) data; + data += sizeof(__be32); + } else { + /* Immediate data comes after BTH */ + wc.ex.imm_data = ohdr->u.imm_data; + } + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) { + dev->n_pkt_drops++; + goto done; + } + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { + dev->n_pkt_drops++; + goto done; + } + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; + else if (!ipath_get_rwqe(qp, 1)) { + dev->n_pkt_drops++; + goto done; + } + wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + goto last_imm; + + case OP(RDMA_WRITE_LAST): + rdma_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) { + dev->n_pkt_drops++; + goto done; + } + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { + dev->n_pkt_drops++; + goto done; + } + ipath_copy_sge(&qp->r_sge, data, tlen); + break; + + default: + /* Drop packet for unknown opcodes. */ + dev->n_pkt_drops++; + goto done; + } + qp->r_psn++; + qp->r_state = opcode; +done: + return; +} diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c new file mode 100644 index 00000000..e8a2a915 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -0,0 +1,580 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "ipath_verbs.h" +#include "ipath_kernel.h" + +/** + * ipath_ud_loopback - handle send on loopback QPs + * @sqp: the sending QP + * @swqe: the send work request + * + * This is called from ipath_make_ud_req() to forward a WQE addressed + * to the same HCA. + * Note that the receive interrupt handler may be calling ipath_ud_rcv() + * while this is being called. + */ +static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) +{ + struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); + struct ipath_qp *qp; + struct ib_ah_attr *ah_attr; + unsigned long flags; + struct ipath_rq *rq; + struct ipath_srq *srq; + struct ipath_sge_state rsge; + struct ipath_sge *sge; + struct ipath_rwq *wq; + struct ipath_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + struct ib_wc wc; + u32 tail; + u32 rlen; + u32 length; + + qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + goto done; + } + + /* + * Check that the qkey matches (except for QP0, see 9.6.1.4.1). + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + if (unlikely(qp->ibqp.qp_num && + ((int) swqe->wr.wr.ud.remote_qkey < 0 ? + sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) { + /* XXX OK to lose a count once in a while. */ + dev->qkey_violations++; + dev->n_pkt_drops++; + goto drop; + } + + /* + * A GRH is expected to precede the data even if not + * present on the wire. + */ + length = swqe->length; + memset(&wc, 0, sizeof wc); + wc.byte_len = length + sizeof(struct ib_grh); + + if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = swqe->wr.ex.imm_data; + } + + /* + * This would be a lot simpler if we could call ipath_get_rwqe() + * but that uses state that the receive interrupt handler uses + * so we would need to lock out receive interrupts while doing + * local loopback. + */ + if (qp->ibqp.srq) { + srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; + rq = &qp->r_rq; + } + + /* + * Get the next work request entry to find where to put the data. + * Note that it is safe to drop the lock after changing rq->tail + * since ipath_post_receive() won't fill the empty slot. + */ + spin_lock_irqsave(&rq->lock, flags); + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto drop; + } + wqe = get_rwqe_ptr(rq, tail); + rsge.sg_list = qp->r_ud_sg_list; + if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto drop; + } + /* Silently drop packets which are too big. */ + if (wc.byte_len > rlen) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto drop; + } + if (++tail >= rq->size) + tail = 0; + wq->tail = tail; + wc.wr_id = wqe->wr_id; + if (handler) { + u32 n; + + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; + else + n -= tail; + if (n < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + } else + spin_unlock_irqrestore(&rq->lock, flags); + } else + spin_unlock_irqrestore(&rq->lock, flags); + + ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + if (ah_attr->ah_flags & IB_AH_GRH) { + ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); + wc.wc_flags |= IB_WC_GRH; + } else + ipath_skip_sge(&rsge, sizeof(struct ib_grh)); + sge = swqe->sg_list; + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + ipath_copy_sge(&rsge, sge->vaddr, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--swqe->wr.num_sge) + sge++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + length -= len; + } + wc.status = IB_WC_SUCCESS; + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + wc.src_qp = sqp->ibqp.qp_num; + /* XXX do we know which pkey matched? Only needed for GSI. */ + wc.pkey_index = 0; + wc.slid = dev->dd->ipath_lid | + (ah_attr->src_path_bits & + ((1 << dev->dd->ipath_lmc) - 1)); + wc.sl = ah_attr->sl; + wc.dlid_path_bits = + ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1); + wc.port_num = 1; + /* Signal completion event if the solicited bit is set. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + swqe->wr.send_flags & IB_SEND_SOLICITED); +drop: + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +done:; +} + +/** + * ipath_make_ud_req - construct a UD request packet + * @qp: the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int ipath_make_ud_req(struct ipath_qp *qp) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_other_headers *ohdr; + struct ib_ah_attr *ah_attr; + struct ipath_swqe *wqe; + unsigned long flags; + u32 nwords; + u32 extra_bytes; + u32 bth0; + u16 lrh0; + u16 lid; + int ret = 0; + int next_cur; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } + + if (qp->s_cur == qp->s_head) + goto bail; + + wqe = get_swqe_ptr(qp, qp->s_cur); + next_cur = qp->s_cur + 1; + if (next_cur >= qp->s_size) + next_cur = 0; + + /* Construct the header. */ + ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) { + if (ah_attr->dlid != IPATH_PERMISSIVE_LID) + dev->n_multicast_xmit++; + else + dev->n_unicast_xmit++; + } else { + dev->n_unicast_xmit++; + lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); + if (unlikely(lid == dev->dd->ipath_lid)) { + /* + * If DMAs are in progress, we can't generate + * a completion for the loopback packet since + * it would be out of order. + * XXX Instead of waiting, we could queue a + * zero length descriptor so we get a callback. + */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + qp->s_cur = next_cur; + spin_unlock_irqrestore(&qp->s_lock, flags); + ipath_ud_loopback(qp, wqe); + spin_lock_irqsave(&qp->s_lock, flags); + ipath_send_complete(qp, wqe, IB_WC_SUCCESS); + goto done; + } + } + + qp->s_cur = next_cur; + extra_bytes = -wqe->length & 3; + nwords = (wqe->length + extra_bytes) >> 2; + + /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ + qp->s_hdrwords = 7; + qp->s_cur_size = wqe->length; + qp->s_cur_sge = &qp->s_sge; + qp->s_dmult = ah_attr->static_rate; + qp->s_wqe = wqe; + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + + if (ah_attr->ah_flags & IB_AH_GRH) { + /* Header size in 32-bit words. */ + qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, + &ah_attr->grh, + qp->s_hdrwords, nwords); + lrh0 = IPATH_LRH_GRH; + ohdr = &qp->s_hdr.u.l.oth; + /* + * Don't worry about sending to locally attached multicast + * QPs. It is unspecified by the spec. what happens. + */ + } else { + /* Header size in 32-bit words. */ + lrh0 = IPATH_LRH_BTH; + ohdr = &qp->s_hdr.u.oth; + } + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + qp->s_hdrwords++; + ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; + bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; + } else + bth0 = IB_OPCODE_UD_SEND_ONLY << 24; + lrh0 |= ah_attr->sl << 4; + if (qp->ibqp.qp_type == IB_QPT_SMI) + lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ + qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + + SIZE_OF_CRC); + lid = dev->dd->ipath_lid; + if (lid) { + lid |= ah_attr->src_path_bits & + ((1 << dev->dd->ipath_lmc) - 1); + qp->s_hdr.lrh[3] = cpu_to_be16(lid); + } else + qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= 1 << 23; + bth0 |= extra_bytes << 20; + bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : + ipath_get_pkey(dev->dd, qp->s_pkey_index); + ohdr->bth[0] = cpu_to_be32(bth0); + /* + * Use the multicast QP if the destination LID is a multicast LID. + */ + ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && + ah_attr->dlid != IPATH_PERMISSIVE_LID ? + cpu_to_be32(IPATH_MULTICAST_QPN) : + cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); + /* + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? + qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); + +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * ipath_ud_rcv - receive an incoming UD packet + * @dev: the device the packet came in on + * @hdr: the packet header + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP the packet came on + * + * This is called from ipath_qp_rcv() to process an incoming UD packet + * for the given QP. + * Called at interrupt level. + */ +void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct ipath_qp *qp) +{ + struct ipath_other_headers *ohdr; + int opcode; + u32 hdrsize; + u32 pad; + struct ib_wc wc; + u32 qkey; + u32 src_qp; + u16 dlid; + int header_in_data; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */ + qkey = be32_to_cpu(ohdr->u.ud.deth[0]); + src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); + header_in_data = 0; + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */ + /* + * The header with GRH is 68 bytes and the core driver sets + * the eager header buffer size to 56 bytes so the last 12 + * bytes of the IB header is in the data buffer. + */ + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; + if (header_in_data) { + qkey = be32_to_cpu(((__be32 *) data)[1]); + src_qp = be32_to_cpu(((__be32 *) data)[2]); + data += 12; + } else { + qkey = be32_to_cpu(ohdr->u.ud.deth[0]); + src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); + } + } + src_qp &= IPATH_QPN_MASK; + + /* + * Check that the permissive LID is only used on QP0 + * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). + */ + if (qp->ibqp.qp_num) { + if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || + hdr->lrh[3] == IB_LID_PERMISSIVE)) { + dev->n_pkt_drops++; + goto bail; + } + if (unlikely(qkey != qp->qkey)) { + /* XXX OK to lose a count once in a while. */ + dev->qkey_violations++; + dev->n_pkt_drops++; + goto bail; + } + } else if (hdr->lrh[1] == IB_LID_PERMISSIVE || + hdr->lrh[3] == IB_LID_PERMISSIVE) { + struct ib_smp *smp = (struct ib_smp *) data; + + if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + dev->n_pkt_drops++; + goto bail; + } + } + + /* + * The opcode is in the low byte when its in network order + * (top byte when in host order). + */ + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (qp->ibqp.qp_num > 1 && + opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { + if (header_in_data) { + wc.ex.imm_data = *(__be32 *) data; + data += sizeof(__be32); + } else + wc.ex.imm_data = ohdr->u.ud.imm_data; + wc.wc_flags = IB_WC_WITH_IMM; + hdrsize += sizeof(u32); + } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { + wc.ex.imm_data = 0; + wc.wc_flags = 0; + } else { + dev->n_pkt_drops++; + goto bail; + } + + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + if (unlikely(tlen < (hdrsize + pad + 4))) { + /* Drop incomplete packets. */ + dev->n_pkt_drops++; + goto bail; + } + tlen -= hdrsize + pad + 4; + + /* Drop invalid MAD packets (see 13.5.3.1). */ + if (unlikely((qp->ibqp.qp_num == 0 && + (tlen != 256 || + (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) || + (qp->ibqp.qp_num == 1 && + (tlen != 256 || + (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) { + dev->n_pkt_drops++; + goto bail; + } + + /* + * A GRH is expected to precede the data even if not + * present on the wire. + */ + wc.byte_len = tlen + sizeof(struct ib_grh); + + /* + * Get the next work request entry to find where to put the data. + */ + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; + else if (!ipath_get_rwqe(qp, 0)) { + /* + * Count VL15 packets dropped due to no receive buffer. + * Otherwise, count them as buffer overruns since usually, + * the HW will be able to receive packets even if there are + * no QPs with posted receive buffers. + */ + if (qp->ibqp.qp_num == 0) + dev->n_vl15_dropped++; + else + dev->rcv_errors++; + goto bail; + } + /* Silently drop packets which are too big. */ + if (wc.byte_len > qp->r_len) { + qp->r_flags |= IPATH_R_REUSE_SGE; + dev->n_pkt_drops++; + goto bail; + } + if (has_grh) { + ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, + sizeof(struct ib_grh)); + wc.wc_flags |= IB_WC_GRH; + } else + ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); + ipath_copy_sge(&qp->r_sge, data, + wc.byte_len - sizeof(struct ib_grh)); + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) + goto bail; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.qp = &qp->ibqp; + wc.src_qp = src_qp; + /* XXX do we know which pkey matched? Only needed for GSI. */ + wc.pkey_index = 0; + wc.slid = be16_to_cpu(hdr->lrh[3]); + wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF; + dlid = be16_to_cpu(hdr->lrh[1]); + /* + * Save the LMC lower bits if the destination LID is a unicast LID. + */ + wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 : + dlid & ((1 << dev->dd->ipath_lmc) - 1); + wc.port_num = 1; + /* Signal completion event if the solicited bit is set. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(1 << 23)) != 0); + +bail:; +} diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c new file mode 100644 index 00000000..dc66c450 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "ipath_kernel.h" + +static void __ipath_release_user_pages(struct page **p, size_t num_pages, + int dirty) +{ + size_t i; + + for (i = 0; i < num_pages; i++) { + ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i, + (unsigned long) num_pages, p[i]); + if (dirty) + set_page_dirty_lock(p[i]); + put_page(p[i]); + } +} + +/* call with current->mm->mmap_sem held */ +static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p, struct vm_area_struct **vma) +{ + unsigned long lock_limit; + size_t got; + int ret; + + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (num_pages > lock_limit) { + ret = -ENOMEM; + goto bail; + } + + ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n", + (unsigned long) num_pages, start_page); + + for (got = 0; got < num_pages; got += ret) { + ret = get_user_pages(current, current->mm, + start_page + got * PAGE_SIZE, + num_pages - got, 1, 1, + p + got, vma); + if (ret < 0) + goto bail_release; + } + + current->mm->pinned_vm += num_pages; + + ret = 0; + goto bail; + +bail_release: + __ipath_release_user_pages(p, got, 0); +bail: + return ret; +} + +/** + * ipath_map_page - a safety wrapper around pci_map_page() + * + * A dma_addr of all 0's is interpreted by the chip as "disabled". + * Unfortunately, it can also be a valid dma_addr returned on some + * architectures. + * + * The powerpc iommu assigns dma_addrs in ascending order, so we don't + * have to bother with retries or mapping a dummy page to insure we + * don't just get the same mapping again. + * + * I'm sure we won't be so lucky with other iommu's, so FIXME. + */ +dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + dma_addr_t phys; + + phys = pci_map_page(hwdev, page, offset, size, direction); + + if (phys == 0) { + pci_unmap_page(hwdev, phys, size, direction); + phys = pci_map_page(hwdev, page, offset, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** + * ipath_map_single - a safety wrapper around pci_map_single() + * + * Same idea as ipath_map_page(). + */ +dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size, + int direction) +{ + dma_addr_t phys; + + phys = pci_map_single(hwdev, ptr, size, direction); + + if (phys == 0) { + pci_unmap_single(hwdev, phys, size, direction); + phys = pci_map_single(hwdev, ptr, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** + * ipath_get_user_pages - lock user pages into memory + * @start_page: the start page + * @num_pages: the number of pages + * @p: the output page structures + * + * This function takes a given start page (page aligned user virtual + * address) and pins it and the following specified number of pages. For + * now, num_pages is always 1, but that will probably change at some point + * (because caller is doing expected sends on a single virtually contiguous + * buffer, so we can do all pages at once). + */ +int ipath_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p) +{ + int ret; + + down_write(¤t->mm->mmap_sem); + + ret = __ipath_get_user_pages(start_page, num_pages, p, NULL); + + up_write(¤t->mm->mmap_sem); + + return ret; +} + +void ipath_release_user_pages(struct page **p, size_t num_pages) +{ + down_write(¤t->mm->mmap_sem); + + __ipath_release_user_pages(p, num_pages, 1); + + current->mm->pinned_vm -= num_pages; + + up_write(¤t->mm->mmap_sem); +} + +struct ipath_user_pages_work { + struct work_struct work; + struct mm_struct *mm; + unsigned long num_pages; +}; + +static void user_pages_account(struct work_struct *_work) +{ + struct ipath_user_pages_work *work = + container_of(_work, struct ipath_user_pages_work, work); + + down_write(&work->mm->mmap_sem); + work->mm->pinned_vm -= work->num_pages; + up_write(&work->mm->mmap_sem); + mmput(work->mm); + kfree(work); +} + +void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) +{ + struct ipath_user_pages_work *work; + struct mm_struct *mm; + + __ipath_release_user_pages(p, num_pages, 1); + + mm = get_task_mm(current); + if (!mm) + return; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + goto bail_mm; + + INIT_WORK(&work->work, user_pages_account); + work->mm = mm; + work->num_pages = num_pages; + + queue_work(ib_wq, &work->work); + return; + +bail_mm: + mmput(mm); + return; +} diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c new file mode 100644 index 00000000..f5cb13b2 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_user_sdma.h" + +/* minimum size of header */ +#define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64 +/* expected size of headers (for dma_pool) */ +#define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64 +/* length mask in PBC (lower 11 bits) */ +#define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1) + +struct ipath_user_sdma_pkt { + u8 naddr; /* dimension of addr (1..3) ... */ + u32 counter; /* sdma pkts queued counter for this entry */ + u64 added; /* global descq number of entries */ + + struct { + u32 offset; /* offset for kvaddr, addr */ + u32 length; /* length in page */ + u8 put_page; /* should we put_page? */ + u8 dma_mapped; /* is page dma_mapped? */ + struct page *page; /* may be NULL (coherent mem) */ + void *kvaddr; /* FIXME: only for pio hack */ + dma_addr_t addr; + } addr[4]; /* max pages, any more and we coalesce */ + struct list_head list; /* list element */ +}; + +struct ipath_user_sdma_queue { + /* + * pkts sent to dma engine are queued on this + * list head. the type of the elements of this + * list are struct ipath_user_sdma_pkt... + */ + struct list_head sent; + + /* headers with expected length are allocated from here... */ + char header_cache_name[64]; + struct dma_pool *header_cache; + + /* packets are allocated from the slab cache... */ + char pkt_slab_name[64]; + struct kmem_cache *pkt_slab; + + /* as packets go on the queued queue, they are counted... */ + u32 counter; + u32 sent_counter; + + /* dma page table */ + struct rb_root dma_pages_root; + + /* protect everything above... */ + struct mutex lock; +}; + +struct ipath_user_sdma_queue * +ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport) +{ + struct ipath_user_sdma_queue *pq = + kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL); + + if (!pq) + goto done; + + pq->counter = 0; + pq->sent_counter = 0; + INIT_LIST_HEAD(&pq->sent); + + mutex_init(&pq->lock); + + snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), + "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport); + pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name, + sizeof(struct ipath_user_sdma_pkt), + 0, 0, NULL); + + if (!pq->pkt_slab) + goto err_kfree; + + snprintf(pq->header_cache_name, sizeof(pq->header_cache_name), + "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport); + pq->header_cache = dma_pool_create(pq->header_cache_name, + dev, + IPATH_USER_SDMA_EXP_HEADER_LENGTH, + 4, 0); + if (!pq->header_cache) + goto err_slab; + + pq->dma_pages_root = RB_ROOT; + + goto done; + +err_slab: + kmem_cache_destroy(pq->pkt_slab); +err_kfree: + kfree(pq); + pq = NULL; + +done: + return pq; +} + +static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt, + int i, size_t offset, size_t len, + int put_page, int dma_mapped, + struct page *page, + void *kvaddr, dma_addr_t dma_addr) +{ + pkt->addr[i].offset = offset; + pkt->addr[i].length = len; + pkt->addr[i].put_page = put_page; + pkt->addr[i].dma_mapped = dma_mapped; + pkt->addr[i].page = page; + pkt->addr[i].kvaddr = kvaddr; + pkt->addr[i].addr = dma_addr; +} + +static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt, + u32 counter, size_t offset, + size_t len, int dma_mapped, + struct page *page, + void *kvaddr, dma_addr_t dma_addr) +{ + pkt->naddr = 1; + pkt->counter = counter; + ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, + kvaddr, dma_addr); +} + +/* we've too many pages in the iovec, coalesce to a single page */ +static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, + struct ipath_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov) { + int ret = 0; + struct page *page = alloc_page(GFP_KERNEL); + void *mpage_save; + char *mpage; + int i; + int len = 0; + dma_addr_t dma_addr; + + if (!page) { + ret = -ENOMEM; + goto done; + } + + mpage = kmap(page); + mpage_save = mpage; + for (i = 0; i < niov; i++) { + int cfur; + + cfur = copy_from_user(mpage, + iov[i].iov_base, iov[i].iov_len); + if (cfur) { + ret = -EFAULT; + goto free_unmap; + } + + mpage += iov[i].iov_len; + len += iov[i].iov_len; + } + + dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = -ENOMEM; + goto free_unmap; + } + + ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, + dma_addr); + pkt->naddr = 2; + + goto done; + +free_unmap: + kunmap(page); + __free_page(page); +done: + return ret; +} + +/* how many pages in this iovec element? */ +static int ipath_user_sdma_num_pages(const struct iovec *iov) +{ + const unsigned long addr = (unsigned long) iov->iov_base; + const unsigned long len = iov->iov_len; + const unsigned long spage = addr & PAGE_MASK; + const unsigned long epage = (addr + len - 1) & PAGE_MASK; + + return 1 + ((epage - spage) >> PAGE_SHIFT); +} + +/* truncate length to page boundary */ +static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len) +{ + const unsigned long offset = addr & ~PAGE_MASK; + + return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; +} + +static void ipath_user_sdma_free_pkt_frag(struct device *dev, + struct ipath_user_sdma_queue *pq, + struct ipath_user_sdma_pkt *pkt, + int frag) +{ + const int i = frag; + + if (pkt->addr[i].page) { + if (pkt->addr[i].dma_mapped) + dma_unmap_page(dev, + pkt->addr[i].addr, + pkt->addr[i].length, + DMA_TO_DEVICE); + + if (pkt->addr[i].kvaddr) + kunmap(pkt->addr[i].page); + + if (pkt->addr[i].put_page) + put_page(pkt->addr[i].page); + else + __free_page(pkt->addr[i].page); + } else if (pkt->addr[i].kvaddr) + /* free coherent mem from cache... */ + dma_pool_free(pq->header_cache, + pkt->addr[i].kvaddr, pkt->addr[i].addr); +} + +/* return number of pages pinned... */ +static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, + struct ipath_user_sdma_pkt *pkt, + unsigned long addr, int tlen, int npages) +{ + struct page *pages[2]; + int j; + int ret; + + ret = get_user_pages(current, current->mm, addr, + npages, 0, 1, pages, NULL); + + if (ret != npages) { + int i; + + for (i = 0; i < ret; i++) + put_page(pages[i]); + + ret = -ENOMEM; + goto done; + } + + for (j = 0; j < npages; j++) { + /* map the pages... */ + const int flen = + ipath_user_sdma_page_length(addr, tlen); + dma_addr_t dma_addr = + dma_map_page(&dd->pcidev->dev, + pages[j], 0, flen, DMA_TO_DEVICE); + unsigned long fofs = addr & ~PAGE_MASK; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = -ENOMEM; + goto done; + } + + ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, + pages[j], kmap(pages[j]), + dma_addr); + + pkt->naddr++; + addr += flen; + tlen -= flen; + } + +done: + return ret; +} + +static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + struct ipath_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov) +{ + int ret = 0; + unsigned long idx; + + for (idx = 0; idx < niov; idx++) { + const int npages = ipath_user_sdma_num_pages(iov + idx); + const unsigned long addr = (unsigned long) iov[idx].iov_base; + + ret = ipath_user_sdma_pin_pages(dd, pkt, + addr, iov[idx].iov_len, + npages); + if (ret < 0) + goto free_pkt; + } + + goto done; + +free_pkt: + for (idx = 0; idx < pkt->naddr; idx++) + ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); + +done: + return ret; +} + +static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + struct ipath_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov, int npages) +{ + int ret = 0; + + if (npages >= ARRAY_SIZE(pkt->addr)) + ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov); + else + ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); + + return ret; +} + +/* free a packet list -- return counter value of last packet */ +static void ipath_user_sdma_free_pkt_list(struct device *dev, + struct ipath_user_sdma_queue *pq, + struct list_head *list) +{ + struct ipath_user_sdma_pkt *pkt, *pkt_next; + + list_for_each_entry_safe(pkt, pkt_next, list, list) { + int i; + + for (i = 0; i < pkt->naddr; i++) + ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i); + + kmem_cache_free(pq->pkt_slab, pkt); + } +} + +/* + * copy headers, coalesce etc -- pq->lock must be held + * + * we queue all the packets to list, returning the + * number of bytes total. list must be empty initially, + * as, if there is an error we clean it... + */ +static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + struct list_head *list, + const struct iovec *iov, + unsigned long niov, + int maxpkts) +{ + unsigned long idx = 0; + int ret = 0; + int npkts = 0; + struct page *page = NULL; + __le32 *pbc; + dma_addr_t dma_addr; + struct ipath_user_sdma_pkt *pkt = NULL; + size_t len; + size_t nw; + u32 counter = pq->counter; + int dma_mapped = 0; + + while (idx < niov && npkts < maxpkts) { + const unsigned long addr = (unsigned long) iov[idx].iov_base; + const unsigned long idx_save = idx; + unsigned pktnw; + unsigned pktnwc; + int nfrags = 0; + int npages = 0; + int cfur; + + dma_mapped = 0; + len = iov[idx].iov_len; + nw = len >> 2; + page = NULL; + + pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_list; + } + + if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH || + len > PAGE_SIZE || len & 3 || addr & 3) { + ret = -EINVAL; + goto free_pkt; + } + + if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH) + pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, + &dma_addr); + else + pbc = NULL; + + if (!pbc) { + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto free_pkt; + } + pbc = kmap(page); + } + + cfur = copy_from_user(pbc, iov[idx].iov_base, len); + if (cfur) { + ret = -EFAULT; + goto free_pbc; + } + + /* + * this assignment is a bit strange. it's because the + * the pbc counts the number of 32 bit words in the full + * packet _except_ the first word of the pbc itself... + */ + pktnwc = nw - 1; + + /* + * pktnw computation yields the number of 32 bit words + * that the caller has indicated in the PBC. note that + * this is one less than the total number of words that + * goes to the send DMA engine as the first 32 bit word + * of the PBC itself is not counted. Armed with this count, + * we can verify that the packet is consistent with the + * iovec lengths. + */ + pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK; + if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { + ret = -EINVAL; + goto free_pbc; + } + + + idx++; + while (pktnwc < pktnw && idx < niov) { + const size_t slen = iov[idx].iov_len; + const unsigned long faddr = + (unsigned long) iov[idx].iov_base; + + if (slen & 3 || faddr & 3 || !slen || + slen > PAGE_SIZE) { + ret = -EINVAL; + goto free_pbc; + } + + npages++; + if ((faddr & PAGE_MASK) != + ((faddr + slen - 1) & PAGE_MASK)) + npages++; + + pktnwc += slen >> 2; + idx++; + nfrags++; + } + + if (pktnwc != pktnw) { + ret = -EINVAL; + goto free_pbc; + } + + if (page) { + dma_addr = dma_map_page(&dd->pcidev->dev, + page, 0, len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = -ENOMEM; + goto free_pbc; + } + + dma_mapped = 1; + } + + ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, + page, pbc, dma_addr); + + if (nfrags) { + ret = ipath_user_sdma_init_payload(dd, pq, pkt, + iov + idx_save + 1, + nfrags, npages); + if (ret < 0) + goto free_pbc_dma; + } + + counter++; + npkts++; + + list_add_tail(&pkt->list, list); + } + + ret = idx; + goto done; + +free_pbc_dma: + if (dma_mapped) + dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); +free_pbc: + if (page) { + kunmap(page); + __free_page(page); + } else + dma_pool_free(pq->header_cache, pbc, dma_addr); +free_pkt: + kmem_cache_free(pq->pkt_slab, pkt); +free_list: + ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); +done: + return ret; +} + +static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq, + u32 c) +{ + pq->sent_counter = c; +} + +/* try to clean out queue -- needs pq->lock */ +static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq) +{ + struct list_head free_list; + struct ipath_user_sdma_pkt *pkt; + struct ipath_user_sdma_pkt *pkt_prev; + int ret = 0; + + INIT_LIST_HEAD(&free_list); + + list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { + s64 descd = dd->ipath_sdma_descq_removed - pkt->added; + + if (descd < 0) + break; + + list_move_tail(&pkt->list, &free_list); + + /* one more packet cleaned */ + ret++; + } + + if (!list_empty(&free_list)) { + u32 counter; + + pkt = list_entry(free_list.prev, + struct ipath_user_sdma_pkt, list); + counter = pkt->counter; + + ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); + ipath_user_sdma_set_complete_counter(pq, counter); + } + + return ret; +} + +void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq) +{ + if (!pq) + return; + + kmem_cache_destroy(pq->pkt_slab); + dma_pool_destroy(pq->header_cache); + kfree(pq); +} + +/* clean descriptor queue, returns > 0 if some elements cleaned */ +static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + ret = ipath_sdma_make_progress(dd); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + + return ret; +} + +/* we're in close, drain packets so that we can cleanup successfully... */ +void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq) +{ + int i; + + if (!pq) + return; + + for (i = 0; i < 100; i++) { + mutex_lock(&pq->lock); + if (list_empty(&pq->sent)) { + mutex_unlock(&pq->lock); + break; + } + ipath_user_sdma_hwqueue_clean(dd); + ipath_user_sdma_queue_clean(dd, pq); + mutex_unlock(&pq->lock); + msleep(10); + } + + if (!list_empty(&pq->sent)) { + struct list_head free_list; + + printk(KERN_INFO "drain: lists not empty: forcing!\n"); + INIT_LIST_HEAD(&free_list); + mutex_lock(&pq->lock); + list_splice_init(&pq->sent, &free_list); + ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); + mutex_unlock(&pq->lock); + } +} + +static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd, + u64 addr, u64 dwlen, u64 dwoffset) +{ + return cpu_to_le64(/* SDmaPhyAddr[31:0] */ + ((addr & 0xfffffffcULL) << 32) | + /* SDmaGeneration[1:0] */ + ((dd->ipath_sdma_generation & 3ULL) << 30) | + /* SDmaDwordCount[10:0] */ + ((dwlen & 0x7ffULL) << 16) | + /* SDmaBufOffset[12:2] */ + (dwoffset & 0x7ffULL)); +} + +static inline __le64 ipath_sdma_make_first_desc0(__le64 descq) +{ + return descq | cpu_to_le64(1ULL << 12); +} + +static inline __le64 ipath_sdma_make_last_desc0(__le64 descq) +{ + /* last */ /* dma head */ + return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); +} + +static inline __le64 ipath_sdma_make_desc1(u64 addr) +{ + /* SDmaPhyAddr[47:32] */ + return cpu_to_le64(addr >> 32); +} + +static void ipath_user_sdma_send_frag(struct ipath_devdata *dd, + struct ipath_user_sdma_pkt *pkt, int idx, + unsigned ofs, u16 tail) +{ + const u64 addr = (u64) pkt->addr[idx].addr + + (u64) pkt->addr[idx].offset; + const u64 dwlen = (u64) pkt->addr[idx].length / 4; + __le64 *descqp; + __le64 descq0; + + descqp = &dd->ipath_sdma_descq[tail].qw[0]; + + descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs); + if (idx == 0) + descq0 = ipath_sdma_make_first_desc0(descq0); + if (idx == pkt->naddr - 1) + descq0 = ipath_sdma_make_last_desc0(descq0); + + descqp[0] = descq0; + descqp[1] = ipath_sdma_make_desc1(addr); +} + +/* pq->lock must be held, get packets on the wire... */ +static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + struct list_head *pktlist) +{ + int ret = 0; + unsigned long flags; + u16 tail; + + if (list_empty(pktlist)) + return 0; + + if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE))) + return -ECOMM; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + + if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) { + ret = -ECOMM; + goto unlock; + } + + tail = dd->ipath_sdma_descq_tail; + while (!list_empty(pktlist)) { + struct ipath_user_sdma_pkt *pkt = + list_entry(pktlist->next, struct ipath_user_sdma_pkt, + list); + int i; + unsigned ofs = 0; + u16 dtail = tail; + + if (pkt->naddr > ipath_sdma_descq_freecnt(dd)) + goto unlock_check_tail; + + for (i = 0; i < pkt->naddr; i++) { + ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail); + ofs += pkt->addr[i].length >> 2; + + if (++tail == dd->ipath_sdma_descq_cnt) { + tail = 0; + ++dd->ipath_sdma_generation; + } + } + + if ((ofs<<2) > dd->ipath_ibmaxlen) { + ipath_dbg("packet size %X > ibmax %X, fail\n", + ofs<<2, dd->ipath_ibmaxlen); + ret = -EMSGSIZE; + goto unlock; + } + + /* + * if the packet is >= 2KB mtu equivalent, we have to use + * the large buffers, and have to mark each descriptor as + * part of a large buffer packet. + */ + if (ofs >= IPATH_SMALLBUF_DWORDS) { + for (i = 0; i < pkt->naddr; i++) { + dd->ipath_sdma_descq[dtail].qw[0] |= + cpu_to_le64(1ULL << 14); + if (++dtail == dd->ipath_sdma_descq_cnt) + dtail = 0; + } + } + + dd->ipath_sdma_descq_added += pkt->naddr; + pkt->added = dd->ipath_sdma_descq_added; + list_move_tail(&pkt->list, &pq->sent); + ret++; + } + +unlock_check_tail: + /* advance the tail on the chip if necessary */ + if (dd->ipath_sdma_descq_tail != tail) { + wmb(); + ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail); + dd->ipath_sdma_descq_tail = tail; + } + +unlock: + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + + return ret; +} + +int ipath_user_sdma_writev(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + const struct iovec *iov, + unsigned long dim) +{ + int ret = 0; + struct list_head list; + int npkts = 0; + + INIT_LIST_HEAD(&list); + + mutex_lock(&pq->lock); + + if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) { + ipath_user_sdma_hwqueue_clean(dd); + ipath_user_sdma_queue_clean(dd, pq); + } + + while (dim) { + const int mxp = 8; + + down_write(¤t->mm->mmap_sem); + ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); + up_write(¤t->mm->mmap_sem); + + if (ret <= 0) + goto done_unlock; + else { + dim -= ret; + iov += ret; + } + + /* force packets onto the sdma hw queue... */ + if (!list_empty(&list)) { + /* + * lazily clean hw queue. the 4 is a guess of about + * how many sdma descriptors a packet will take (it + * doesn't have to be perfect). + */ + if (ipath_sdma_descq_freecnt(dd) < ret * 4) { + ipath_user_sdma_hwqueue_clean(dd); + ipath_user_sdma_queue_clean(dd, pq); + } + + ret = ipath_user_sdma_push_pkts(dd, pq, &list); + if (ret < 0) + goto done_unlock; + else { + npkts += ret; + pq->counter += ret; + + if (!list_empty(&list)) + goto done_unlock; + } + } + } + +done_unlock: + if (!list_empty(&list)) + ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list); + mutex_unlock(&pq->lock); + + return (ret < 0) ? ret : npkts; +} + +int ipath_user_sdma_make_progress(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq) +{ + int ret = 0; + + mutex_lock(&pq->lock); + ipath_user_sdma_hwqueue_clean(dd); + ret = ipath_user_sdma_queue_clean(dd, pq); + mutex_unlock(&pq->lock); + + return ret; +} + +u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq) +{ + return pq->sent_counter; +} + +u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq) +{ + return pq->counter; +} + diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h new file mode 100644 index 00000000..fc76316c --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include + +struct ipath_user_sdma_queue; + +struct ipath_user_sdma_queue * +ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport); +void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq); + +int ipath_user_sdma_writev(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + const struct iovec *iov, + unsigned long dim); + +int ipath_user_sdma_make_progress(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq); + +void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq); + +u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq); +u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c new file mode 100644 index 00000000..439c35d4 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -0,0 +1,2341 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" +#include "ipath_verbs.h" +#include "ipath_common.h" + +static unsigned int ib_ipath_qp_table_size = 251; +module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(qp_table_size, "QP table size"); + +unsigned int ib_ipath_lkey_table_size = 12; +module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, + S_IRUGO); +MODULE_PARM_DESC(lkey_table_size, + "LKEY table size in bits (2^n, 1 <= n <= 23)"); + +static unsigned int ib_ipath_max_pds = 0xFFFF; +module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_pds, + "Maximum number of protection domains to support"); + +static unsigned int ib_ipath_max_ahs = 0xFFFF; +module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); + +unsigned int ib_ipath_max_cqes = 0x2FFFF; +module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_cqes, + "Maximum number of completion queue entries to support"); + +unsigned int ib_ipath_max_cqs = 0x1FFFF; +module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); + +unsigned int ib_ipath_max_qp_wrs = 0x3FFF; +module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); + +unsigned int ib_ipath_max_qps = 16384; +module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); + +unsigned int ib_ipath_max_sges = 0x60; +module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); + +unsigned int ib_ipath_max_mcast_grps = 16384; +module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_mcast_grps, + "Maximum number of multicast groups to support"); + +unsigned int ib_ipath_max_mcast_qp_attached = 16; +module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached, + uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_mcast_qp_attached, + "Maximum number of attached QPs to support"); + +unsigned int ib_ipath_max_srqs = 1024; +module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); + +unsigned int ib_ipath_max_srq_sges = 128; +module_param_named(max_srq_sges, ib_ipath_max_srq_sges, + uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); + +unsigned int ib_ipath_max_srq_wrs = 0x1FFFF; +module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, + uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); + +static unsigned int ib_ipath_disable_sma; +module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(disable_sma, "Disable the SMA"); + +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; ipath_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = IPATH_POST_RECV_OK, + [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, + [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | + IPATH_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, + [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, +}; + +struct ipath_ucontext { + struct ib_ucontext ibucontext; +}; + +static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct ipath_ucontext, ibucontext); +} + +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_ipath_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD +}; + +/* + * System image GUID. + */ +static __be64 sys_image_guid; + +/** + * ipath_copy_sge - copy data to SGE memory + * @ss: the SGE state + * @data: the data to copy + * @length: the length of the data + */ +void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(sge->vaddr, data, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + +/** + * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func + * @ss: the SGE state + * @length: the number of bytes to skip + */ +void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + length -= len; + } +} + +/* + * Count the number of DMA descriptors needed to send length bytes of data. + * Don't modify the ipath_sge_state to get the count. + * Return zero if any of the segments is not aligned. + */ +static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sg_list = ss->sg_list; + struct ipath_sge sge = ss->sge; + u8 num_sge = ss->num_sge; + u32 ndesc = 1; /* count the header */ + + while (length) { + u32 len = sge.length; + + if (len > length) + len = length; + if (len > sge.sge_length) + len = sge.sge_length; + BUG_ON(len == 0); + if (((long) sge.vaddr & (sizeof(u32) - 1)) || + (len != length && (len & (sizeof(u32) - 1)))) { + ndesc = 0; + break; + } + ndesc++; + sge.vaddr += len; + sge.length -= len; + sge.sge_length -= len; + if (sge.sge_length == 0) { + if (--num_sge) + sge = *sg_list++; + } else if (sge.length == 0 && sge.mr != NULL) { + if (++sge.n >= IPATH_SEGSZ) { + if (++sge.m >= sge.mr->mapsz) + break; + sge.n = 0; + } + sge.vaddr = + sge.mr->map[sge.m]->segs[sge.n].vaddr; + sge.length = + sge.mr->map[sge.m]->segs[sge.n].length; + } + length -= len; + } + return ndesc; +} + +/* + * Copy from the SGEs to the data buffer. + */ +static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, + u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(data, sge->vaddr, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + +/** + * ipath_post_one_send - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) +{ + struct ipath_swqe *wqe; + u32 next; + int i; + int j; + int acc; + int ret; + unsigned long flags; + struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (qp->ibqp.qp_type != IB_QPT_SMI && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + ret = -ENETDOWN; + goto bail; + } + + /* Check that state is OK to post send. */ + if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) + goto bail_inval; + + /* IB spec says that num_sge == 0 is OK. */ + if (wr->num_sge > qp->s_max_sge) + goto bail_inval; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) + goto bail_inval; + } else if (qp->ibqp.qp_type == IB_QPT_UD) { + /* Check UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + goto bail_inval; + /* Check UD destination address PD */ + if (qp->ibqp.pd != wr->wr.ud.ah->pd) + goto bail_inval; + } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) + goto bail_inval; + else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) + goto bail_inval; + else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) + goto bail_inval; + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) { + ret = -ENOMEM; + goto bail; + } + + wqe = get_swqe_ptr(qp, qp->s_head); + wqe->wr = *wr; + wqe->length = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0, j = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = ipath_lkey_ok(qp, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval; + } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) + goto bail_inval; + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + ret = 0; + goto bail; + +bail_inval: + ret = -EINVAL; +bail: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * ipath_post_send - post a send on a QP + * @ibqp: the QP to post the send on + * @wr: the list of work requests to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct ipath_qp *qp = to_iqp(ibqp); + int err = 0; + + for (; wr; wr = wr->next) { + err = ipath_post_one_send(qp, wr); + if (err) { + *bad_wr = wr; + goto bail; + } + } + + /* Try to do the send work in the caller's context. */ + ipath_do_send((unsigned long) qp); + +bail: + return err; +} + +/** + * ipath_post_receive - post a receive on a QP + * @ibqp: the QP to post the receive on + * @wr: the WR to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct ipath_qp *qp = to_iqp(ibqp); + struct ipath_rwq *wq = qp->r_rq.wq; + unsigned long flags; + int ret; + + /* Check that state is OK to post receive. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + for (; wr; wr = wr->next) { + struct ipath_rwqe *wqe; + u32 next; + int i; + + if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&qp->r_rq.lock, flags); + next = wq->head + 1; + if (next >= qp->r_rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + *bad_wr = wr; + ret = -ENOMEM; + goto bail; + } + + wqe = get_rwqe_ptr(&qp->r_rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + } + ret = 0; + +bail: + return ret; +} + +/** + * ipath_qp_rcv - processing an incoming packet on a QP + * @dev: the device the packet came on + * @hdr: the packet header + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP the packet came on + * + * This is called from ipath_ib_rcv() to process an incoming packet + * for the given QP. + * Called at interrupt level. + */ +static void ipath_qp_rcv(struct ipath_ibdev *dev, + struct ipath_ib_header *hdr, int has_grh, + void *data, u32 tlen, struct ipath_qp *qp) +{ + /* Check for valid receive state. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + return; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (ib_ipath_disable_sma) + break; + /* FALLTHROUGH */ + case IB_QPT_UD: + ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); + break; + + case IB_QPT_RC: + ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp); + break; + + case IB_QPT_UC: + ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp); + break; + + default: + break; + } +} + +/** + * ipath_ib_rcv - process an incoming packet + * @arg: the device pointer + * @rhdr: the header of the packet + * @data: the packet data + * @tlen: the packet length + * + * This is called from ipath_kreceive() to process an incoming packet at + * interrupt level. Tlen is the length of the header + data + CRC in bytes. + */ +void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, + u32 tlen) +{ + struct ipath_ib_header *hdr = rhdr; + struct ipath_other_headers *ohdr; + struct ipath_qp *qp; + u32 qp_num; + int lnh; + u8 opcode; + u16 lid; + + if (unlikely(dev == NULL)) + goto bail; + + if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */ + dev->rcv_errors++; + goto bail; + } + + /* Check for a valid destination LID (see ch. 7.11.1). */ + lid = be16_to_cpu(hdr->lrh[1]); + if (lid < IPATH_MULTICAST_LID_BASE) { + lid &= ~((1 << dev->dd->ipath_lmc) - 1); + if (unlikely(lid != dev->dd->ipath_lid)) { + dev->rcv_errors++; + goto bail; + } + } + + /* Check for GRH */ + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + if (lnh == IPATH_LRH_BTH) + ohdr = &hdr->u.oth; + else if (lnh == IPATH_LRH_GRH) + ohdr = &hdr->u.l.oth; + else { + dev->rcv_errors++; + goto bail; + } + + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + dev->opstats[opcode].n_bytes += tlen; + dev->opstats[opcode].n_packets++; + + /* Get the destination QP number. */ + qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK; + if (qp_num == IPATH_MULTICAST_QPN) { + struct ipath_mcast *mcast; + struct ipath_mcast_qp *p; + + if (lnh != IPATH_LRH_GRH) { + dev->n_pkt_drops++; + goto bail; + } + mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); + if (mcast == NULL) { + dev->n_pkt_drops++; + goto bail; + } + dev->n_multicast_rcv++; + list_for_each_entry_rcu(p, &mcast->qp_list, list) + ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); + /* + * Notify ipath_multicast_detach() if it is waiting for us + * to finish. + */ + if (atomic_dec_return(&mcast->refcount) <= 1) + wake_up(&mcast->wait); + } else { + qp = ipath_lookup_qpn(&dev->qp_table, qp_num); + if (qp) { + dev->n_unicast_rcv++; + ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, + tlen, qp); + /* + * Notify ipath_destroy_qp() if it is waiting + * for us to finish. + */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } else + dev->n_pkt_drops++; + } + +bail:; +} + +/** + * ipath_ib_timer - verbs timer + * @arg: the device pointer + * + * This is called from ipath_do_rcv_timer() at interrupt level to check for + * QPs which need retransmits and to collect performance numbers. + */ +static void ipath_ib_timer(struct ipath_ibdev *dev) +{ + struct ipath_qp *resend = NULL; + struct ipath_qp *rnr = NULL; + struct list_head *last; + struct ipath_qp *qp; + unsigned long flags; + + if (dev == NULL) + return; + + spin_lock_irqsave(&dev->pending_lock, flags); + /* Start filling the next pending queue. */ + if (++dev->pending_index >= ARRAY_SIZE(dev->pending)) + dev->pending_index = 0; + /* Save any requests still in the new queue, they have timed out. */ + last = &dev->pending[dev->pending_index]; + while (!list_empty(last)) { + qp = list_entry(last->next, struct ipath_qp, timerwait); + list_del_init(&qp->timerwait); + qp->timer_next = resend; + resend = qp; + atomic_inc(&qp->refcount); + } + last = &dev->rnrwait; + if (!list_empty(last)) { + qp = list_entry(last->next, struct ipath_qp, timerwait); + if (--qp->s_rnr_timeout == 0) { + do { + list_del_init(&qp->timerwait); + qp->timer_next = rnr; + rnr = qp; + atomic_inc(&qp->refcount); + if (list_empty(last)) + break; + qp = list_entry(last->next, struct ipath_qp, + timerwait); + } while (qp->s_rnr_timeout == 0); + } + } + /* + * We should only be in the started state if pma_sample_start != 0 + */ + if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && + --dev->pma_sample_start == 0) { + dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; + ipath_snapshot_counters(dev->dd, &dev->ipath_sword, + &dev->ipath_rword, + &dev->ipath_spkts, + &dev->ipath_rpkts, + &dev->ipath_xmit_wait); + } + if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { + if (dev->pma_sample_interval == 0) { + u64 ta, tb, tc, td, te; + + dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; + ipath_snapshot_counters(dev->dd, &ta, &tb, + &tc, &td, &te); + + dev->ipath_sword = ta - dev->ipath_sword; + dev->ipath_rword = tb - dev->ipath_rword; + dev->ipath_spkts = tc - dev->ipath_spkts; + dev->ipath_rpkts = td - dev->ipath_rpkts; + dev->ipath_xmit_wait = te - dev->ipath_xmit_wait; + } + else + dev->pma_sample_interval--; + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + + /* XXX What if timer fires again while this is running? */ + while (resend != NULL) { + qp = resend; + resend = qp->timer_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_last != qp->s_tail && + ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { + dev->n_timeouts++; + ipath_restart_rc(qp, qp->s_last_psn + 1); + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + while (rnr != NULL) { + qp = rnr; + rnr = qp->timer_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +static void update_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + sge->vaddr += length; + sge->length -= length; + sge->sge_length -= length; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + return; + sge->n = 0; + } + sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = sge->mr->map[sge->m]->segs[sge->n].length; + } +} + +#ifdef __LITTLE_ENDIAN +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); + data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#else +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); + data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#endif + +static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, + u32 length, unsigned flush_wc) +{ + u32 extra = 0; + u32 data = 0; + u32 last; + + while (1) { + u32 len = ss->sge.length; + u32 off; + + if (len > length) + len = length; + if (len > ss->sge.sge_length) + len = ss->sge.sge_length; + BUG_ON(len == 0); + /* If the source address is not aligned, try to align it. */ + off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); + if (off) { + u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & + ~(sizeof(u32) - 1)); + u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); + u32 y; + + y = sizeof(u32) - off; + if (len > y) + len = y; + if (len + extra >= sizeof(u32)) { + data |= set_upper_bits(v, extra * + BITS_PER_BYTE); + len = sizeof(u32) - extra; + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, len, extra); + if (len == length) { + last = data; + break; + } + extra += len; + } + } else if (extra) { + /* Source address is aligned. */ + u32 *addr = (u32 *) ss->sge.vaddr; + int shift = extra * BITS_PER_BYTE; + int ushift = 32 - shift; + u32 l = len; + + while (l >= sizeof(u32)) { + u32 v = *addr; + + data |= set_upper_bits(v, shift); + __raw_writel(data, piobuf); + data = get_upper_bits(v, ushift); + piobuf++; + addr++; + l -= sizeof(u32); + } + /* + * We still have 'extra' number of bytes leftover. + */ + if (l) { + u32 v = *addr; + + if (l + extra >= sizeof(u32)) { + data |= set_upper_bits(v, shift); + len -= l + extra - sizeof(u32); + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, l, + extra); + if (len == length) { + last = data; + break; + } + extra += l; + } + } else if (len == length) { + last = data; + break; + } + } else if (len == length) { + u32 w; + + /* + * Need to round up for the last dword in the + * packet. + */ + w = (len + 3) >> 2; + __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); + piobuf += w - 1; + last = ((u32 *) ss->sge.vaddr)[w - 1]; + break; + } else { + u32 w = len >> 2; + + __iowrite32_copy(piobuf, ss->sge.vaddr, w); + piobuf += w; + + extra = len & (sizeof(u32) - 1); + if (extra) { + u32 v = ((u32 *) ss->sge.vaddr)[w]; + + /* Clear unused upper bytes */ + data = clear_upper_bytes(v, extra, 0); + } + } + update_sge(ss, len); + length -= len; + } + /* Update address before sending packet. */ + update_sge(ss, length); + if (flush_wc) { + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(last, piobuf); + /* be sure trigger word is written */ + ipath_flush_wc(); + } else + __raw_writel(last, piobuf); +} + +/* + * Convert IB rate to delay multiplier. + */ +unsigned ipath_ib_rate_to_mult(enum ib_rate rate) +{ + switch (rate) { + case IB_RATE_2_5_GBPS: return 8; + case IB_RATE_5_GBPS: return 4; + case IB_RATE_10_GBPS: return 2; + case IB_RATE_20_GBPS: return 1; + default: return 0; + } +} + +/* + * Convert delay multiplier to IB rate + */ +static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) +{ + switch (mult) { + case 8: return IB_RATE_2_5_GBPS; + case 4: return IB_RATE_5_GBPS; + case 2: return IB_RATE_10_GBPS; + case 1: return IB_RATE_20_GBPS; + default: return IB_RATE_PORT_CURRENT; + } +} + +static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev) +{ + struct ipath_verbs_txreq *tx = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + if (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + tx = list_entry(l, struct ipath_verbs_txreq, txreq.list); + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + return tx; +} + +static inline void put_txreq(struct ipath_ibdev *dev, + struct ipath_verbs_txreq *tx) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + list_add(&tx->txreq.list, &dev->txreq_free); + spin_unlock_irqrestore(&dev->pending_lock, flags); +} + +static void sdma_complete(void *cookie, int status) +{ + struct ipath_verbs_txreq *tx = cookie; + struct ipath_qp *qp = tx->qp; + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + unsigned long flags; + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; + + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if (tx->wqe) + ipath_send_complete(qp, tx->wqe, ibs); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } else if (tx->wqe) { + spin_lock_irqsave(&qp->s_lock, flags); + ipath_send_complete(qp, tx->wqe, ibs); + spin_unlock_irqrestore(&qp->s_lock, flags); + } + + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) + kfree(tx->txreq.map_addr); + put_txreq(dev, tx); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +static void decrement_dma_busy(struct ipath_qp *qp) +{ + unsigned long flags; + + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } +} + +/* + * Compute the number of clock cycles of delay before sending the next packet. + * The multipliers reflect the number of clocks for the fastest rate so + * one tick at 4xDDR is 8 ticks at 1xSDR. + * If the destination port will take longer to receive a packet than + * the outgoing link can send it, we need to delay sending the next packet + * by the difference in time it takes the receiver to receive and the sender + * to send this packet. + * Note that this delay is always correct for UC and RC but not always + * optimal for UD. For UD, the destination HCA can be different for each + * packet, in which case, we could send packets to a different destination + * while "waiting" for the delay. The overhead for doing this without + * HW support is more than just paying the cost of delaying some packets + * unnecessarily. + */ +static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult) +{ + return (rcv_mult > snd_mult) ? + (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; +} + +static int ipath_verbs_send_dma(struct ipath_qp *qp, + struct ipath_ib_header *hdr, u32 hdrwords, + struct ipath_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_devdata *dd = dev->dd; + struct ipath_verbs_txreq *tx; + u32 *piobuf; + u32 control; + u32 ndesc; + int ret; + + tx = qp->s_tx; + if (tx) { + qp->s_tx = NULL; + /* resend previously constructed packet */ + atomic_inc(&qp->s_dma_busy); + ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); + if (ret) { + qp->s_tx = tx; + decrement_dma_busy(qp); + } + goto bail; + } + + tx = get_txreq(dev); + if (!tx) { + ret = -EBUSY; + goto bail; + } + + /* + * Get the saved delay count we computed for the previous packet + * and save the delay count for this packet to be used next time + * we get here. + */ + control = qp->s_pkt_delay; + qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); + + tx->qp = qp; + atomic_inc(&qp->refcount); + tx->wqe = qp->s_wqe; + tx->txreq.callback = sdma_complete; + tx->txreq.callback_cookie = tx; + tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | + IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC; + if (plen + 1 >= IPATH_SMALLBUF_DWORDS) + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF; + + /* VL15 packets bypass credit check */ + if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) { + control |= 1ULL << 31; + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15; + } + + if (len) { + /* + * Don't try to DMA if it takes more descriptors than + * the queue holds. + */ + ndesc = ipath_count_sge(ss, len); + if (ndesc >= dd->ipath_sdma_descq_cnt) + ndesc = 0; + } else + ndesc = 1; + if (ndesc) { + tx->hdr.pbc[0] = cpu_to_le32(plen); + tx->hdr.pbc[1] = cpu_to_le32(control); + memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); + tx->txreq.sg_count = ndesc; + tx->map_len = (hdrwords + 2) << 2; + tx->txreq.map_addr = &tx->hdr; + atomic_inc(&qp->s_dma_busy); + ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); + if (ret) { + /* save ss and length in dwords */ + tx->ss = ss; + tx->len = dwords; + qp->s_tx = tx; + decrement_dma_busy(qp); + } + goto bail; + } + + /* Allocate a buffer and copy the header and payload to it. */ + tx->map_len = (plen + 1) << 2; + piobuf = kmalloc(tx->map_len, GFP_ATOMIC); + if (unlikely(piobuf == NULL)) { + ret = -EBUSY; + goto err_tx; + } + tx->txreq.map_addr = piobuf; + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; + tx->txreq.sg_count = 1; + + *piobuf++ = (__force u32) cpu_to_le32(plen); + *piobuf++ = (__force u32) cpu_to_le32(control); + memcpy(piobuf, hdr, hdrwords << 2); + ipath_copy_from_sge(piobuf + hdrwords, ss, len); + + atomic_inc(&qp->s_dma_busy); + ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); + /* + * If we couldn't queue the DMA request, save the info + * and try again later rather than destroying the + * buffer and undoing the side effects of the copy. + */ + if (ret) { + tx->ss = NULL; + tx->len = 0; + qp->s_tx = tx; + decrement_dma_busy(qp); + } + dev->n_unaligned++; + goto bail; + +err_tx: + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + put_txreq(dev, tx); +bail: + return ret; +} + +static int ipath_verbs_send_pio(struct ipath_qp *qp, + struct ipath_ib_header *ibhdr, u32 hdrwords, + struct ipath_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; + u32 *hdr = (u32 *) ibhdr; + u32 __iomem *piobuf; + unsigned flush_wc; + u32 control; + int ret; + unsigned long flags; + + piobuf = ipath_getpiobuf(dd, plen, NULL); + if (unlikely(piobuf == NULL)) { + ret = -EBUSY; + goto bail; + } + + /* + * Get the saved delay count we computed for the previous packet + * and save the delay count for this packet to be used next time + * we get here. + */ + control = qp->s_pkt_delay; + qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); + + /* VL15 packets bypass credit check */ + if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15) + control |= 1ULL << 31; + + /* + * Write the length to the control qword plus any needed flags. + * We have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order. + */ + writeq(((u64) control << 32) | plen, piobuf); + piobuf += 2; + + flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; + if (len == 0) { + /* + * If there is just the header portion, must flush before + * writing last word of header for correctness, and after + * the last header word (trigger word). + */ + if (flush_wc) { + ipath_flush_wc(); + __iowrite32_copy(piobuf, hdr, hdrwords - 1); + ipath_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); + ipath_flush_wc(); + } else + __iowrite32_copy(piobuf, hdr, hdrwords); + goto done; + } + + if (flush_wc) + ipath_flush_wc(); + __iowrite32_copy(piobuf, hdr, hdrwords); + piobuf += hdrwords; + + /* The common case is aligned and contained in one segment. */ + if (likely(ss->num_sge == 1 && len <= ss->sge.length && + !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { + u32 *addr = (u32 *) ss->sge.vaddr; + + /* Update address before sending packet. */ + update_sge(ss, len); + if (flush_wc) { + __iowrite32_copy(piobuf, addr, dwords - 1); + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(addr[dwords - 1], piobuf + dwords - 1); + /* be sure trigger word is written */ + ipath_flush_wc(); + } else + __iowrite32_copy(piobuf, addr, dwords); + goto done; + } + copy_io(piobuf, ss, len, flush_wc); +done: + if (qp->s_wqe) { + spin_lock_irqsave(&qp->s_lock, flags); + ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); + spin_unlock_irqrestore(&qp->s_lock, flags); + } + ret = 0; +bail: + return ret; +} + +/** + * ipath_verbs_send - send a packet + * @qp: the QP to send on + * @hdr: the packet header + * @hdrwords: the number of 32-bit words in the header + * @ss: the SGE to send + * @len: the length of the packet in bytes + */ +int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, + u32 hdrwords, struct ipath_sge_state *ss, u32 len) +{ + struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; + u32 plen; + int ret; + u32 dwords = (len + 3) >> 2; + + /* + * Calculate the send buffer trigger address. + * The +1 counts for the pbc control dword following the pbc length. + */ + plen = hdrwords + dwords + 1; + + /* + * VL15 packets (IB_QPT_SMI) will always use PIO, so we + * can defer SDMA restart until link goes ACTIVE without + * worrying about just how we got there. + */ + if (qp->ibqp.qp_type == IB_QPT_SMI || + !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) + ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + plen, dwords); + else + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, + plen, dwords); + + return ret; +} + +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait) +{ + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ret = -EINVAL; + goto bail; + } + *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); + + ret = 0; + +bail: + return ret; +} + +/** + * ipath_get_counters - get various chip counters + * @dd: the infinipath device + * @cntrs: counters are placed here + * + * Return the counters needed by recv_pma_get_portcounters(). + */ +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs) +{ + struct ipath_cregs const *crp = dd->ipath_cregs; + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ret = -EINVAL; + goto bail; + } + cntrs->symbol_error_counter = + ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt); + cntrs->link_error_recovery_counter = + ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ + cntrs->link_downed_counter = + ipath_snap_cntr(dd, crp->cr_iblinkdowncnt); + cntrs->port_rcv_errors = + ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) + + ipath_snap_cntr(dd, crp->cr_rcvovflcnt) + + ipath_snap_cntr(dd, crp->cr_portovflcnt) + + ipath_snap_cntr(dd, crp->cr_err_rlencnt) + + ipath_snap_cntr(dd, crp->cr_invalidrlencnt) + + ipath_snap_cntr(dd, crp->cr_errlinkcnt) + + ipath_snap_cntr(dd, crp->cr_erricrccnt) + + ipath_snap_cntr(dd, crp->cr_errvcrccnt) + + ipath_snap_cntr(dd, crp->cr_errlpcrccnt) + + ipath_snap_cntr(dd, crp->cr_badformatcnt) + + dd->ipath_rxfc_unsupvl_errs; + if (crp->cr_rxotherlocalphyerrcnt) + cntrs->port_rcv_errors += + ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt); + if (crp->cr_rxvlerrcnt) + cntrs->port_rcv_errors += + ipath_snap_cntr(dd, crp->cr_rxvlerrcnt); + cntrs->port_rcv_remphys_errors = + ipath_snap_cntr(dd, crp->cr_rcvebpcnt); + cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt); + cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt); + cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt); + cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt); + cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt); + cntrs->local_link_integrity_errors = + crp->cr_locallinkintegrityerrcnt ? + ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) : + ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors); + cntrs->excessive_buffer_overrun_errors = + crp->cr_excessbufferovflcnt ? + ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) : + dd->ipath_overrun_thresh_errs; + cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ? + ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0; + + ret = 0; + +bail: + return ret; +} + +/** + * ipath_ib_piobufavail - callback when a PIO buffer is available + * @arg: the device pointer + * + * This is called from ipath_intr() at interrupt level when a PIO buffer is + * available after ipath_verbs_send() returned an error that no buffers were + * available. Return 1 if we consumed all the PIO buffers and we still have + * QPs waiting for buffers (for now, just restart the send tasklet and + * return zero). + */ +int ipath_ib_piobufavail(struct ipath_ibdev *dev) +{ + struct list_head *list; + struct ipath_qp *qplist; + struct ipath_qp *qp; + unsigned long flags; + + if (dev == NULL) + goto bail; + + list = &dev->piowait; + qplist = NULL; + + spin_lock_irqsave(&dev->pending_lock, flags); + while (!list_empty(list)) { + qp = list_entry(list->next, struct ipath_qp, piowait); + list_del_init(&qp->piowait); + qp->pio_next = qplist; + qplist = qp; + atomic_inc(&qp->refcount); + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + + while (qplist != NULL) { + qp = qplist; + qplist = qp->pio_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + +bail: + return 0; +} + +static int ipath_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct ipath_ibdev *dev = to_idev(ibdev); + + memset(props, 0, sizeof(*props)); + + props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | + IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; + props->page_size_cap = PAGE_SIZE; + props->vendor_id = + IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; + props->vendor_part_id = dev->dd->ipath_deviceid; + props->hw_ver = dev->dd->ipath_pcirev; + + props->sys_image_guid = dev->sys_image_guid; + + props->max_mr_size = ~0ull; + props->max_qp = ib_ipath_max_qps; + props->max_qp_wr = ib_ipath_max_qp_wrs; + props->max_sge = ib_ipath_max_sges; + props->max_cq = ib_ipath_max_cqs; + props->max_ah = ib_ipath_max_ahs; + props->max_cqe = ib_ipath_max_cqes; + props->max_mr = dev->lk_table.max; + props->max_fmr = dev->lk_table.max; + props->max_map_per_fmr = 32767; + props->max_pd = ib_ipath_max_pds; + props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; + props->max_qp_init_rd_atom = 255; + /* props->max_res_rd_atom */ + props->max_srq = ib_ipath_max_srqs; + props->max_srq_wr = ib_ipath_max_srq_wrs; + props->max_srq_sge = ib_ipath_max_srq_sges; + /* props->local_ca_ack_delay */ + props->atomic_cap = IB_ATOMIC_GLOB; + props->max_pkeys = ipath_get_npkeys(dev->dd); + props->max_mcast_grp = ib_ipath_max_mcast_grps; + props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + + return 0; +} + +const u8 ipath_cvt_physportstate[32] = { + [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN +}; + +u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) +{ + return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); +} + +static int ipath_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_devdata *dd = dev->dd; + enum ib_mtu mtu; + u16 lid = dd->ipath_lid; + u64 ibcstat; + + memset(props, 0, sizeof(*props)); + props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); + props->lmc = dd->ipath_lmc; + props->sm_lid = dev->sm_lid; + props->sm_sl = dev->sm_sl; + ibcstat = dd->ipath_lastibcstat; + /* map LinkState to IB portinfo values. */ + props->state = ipath_ib_linkstate(dd, ibcstat) + 1; + + /* See phys_state_show() */ + props->phys_state = /* MEA: assumes shift == 0 */ + ipath_cvt_physportstate[dd->ipath_lastibcstat & + dd->ibcs_lts_mask]; + props->port_cap_flags = dev->port_cap_flags; + props->gid_tbl_len = 1; + props->max_msg_sz = 0x80000000; + props->pkey_tbl_len = ipath_get_npkeys(dd); + props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) - + dev->z_pkey_violations; + props->qkey_viol_cntr = dev->qkey_violations; + props->active_width = dd->ipath_link_width_active; + /* See rate_show() */ + props->active_speed = dd->ipath_link_speed_active; + props->max_vl_num = 1; /* VLCap = VL0 */ + props->init_type_reply = 0; + + props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; + switch (dd->ipath_ibmtu) { + case 4096: + mtu = IB_MTU_4096; + break; + case 2048: + mtu = IB_MTU_2048; + break; + case 1024: + mtu = IB_MTU_1024; + break; + case 512: + mtu = IB_MTU_512; + break; + case 256: + mtu = IB_MTU_256; + break; + default: + mtu = IB_MTU_2048; + } + props->active_mtu = mtu; + props->subnet_timeout = dev->subnet_timeout; + + return 0; +} + +static int ipath_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + int ret; + + if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) { + ret = -EOPNOTSUPP; + goto bail; + } + + if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) + memcpy(device->node_desc, device_modify->node_desc, 64); + + if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) + to_idev(device)->sys_image_guid = + cpu_to_be64(device_modify->sys_image_guid); + + ret = 0; + +bail: + return ret; +} + +static int ipath_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + struct ipath_ibdev *dev = to_idev(ibdev); + + dev->port_cap_flags |= props->set_port_cap_mask; + dev->port_cap_flags &= ~props->clr_port_cap_mask; + if (port_modify_mask & IB_PORT_SHUTDOWN) + ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); + if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) + dev->qkey_violations = 0; + return 0; +} + +static int ipath_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct ipath_ibdev *dev = to_idev(ibdev); + int ret; + + if (index >= 1) { + ret = -EINVAL; + goto bail; + } + gid->global.subnet_prefix = dev->gid_prefix; + gid->global.interface_id = dev->dd->ipath_guid; + + ret = 0; + +bail: + return ret; +} + +static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_pd *pd; + struct ib_pd *ret; + + /* + * This is actually totally arbitrary. Some correctness tests + * assume there's a maximum number of PDs that can be allocated. + * We don't actually have this limit, but we fail the test if + * we allow allocations of more than we report for this value. + */ + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == ib_ipath_max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); + + /* ib_alloc_pd() will initialize pd->ibpd. */ + pd->user = udata != NULL; + + ret = &pd->ibpd; + +bail: + return ret; +} + +static int ipath_dealloc_pd(struct ib_pd *ibpd) +{ + struct ipath_pd *pd = to_ipd(ibpd); + struct ipath_ibdev *dev = to_idev(ibpd->device); + + spin_lock(&dev->n_pds_lock); + dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); + + kfree(pd); + + return 0; +} + +/** + * ipath_create_ah - create an address handle + * @pd: the protection domain + * @ah_attr: the attributes of the AH + * + * This may be called from interrupt context. + */ +static struct ib_ah *ipath_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + struct ipath_ah *ah; + struct ib_ah *ret; + struct ipath_ibdev *dev = to_idev(pd->device); + unsigned long flags; + + /* A multicast address requires a GRH (see ch. 8.4.1). */ + if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && + ah_attr->dlid != IPATH_PERMISSIVE_LID && + !(ah_attr->ah_flags & IB_AH_GRH)) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + if (ah_attr->dlid == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + if (ah_attr->port_num < 1 || + ah_attr->port_num > pd->device->phys_port_cnt) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + ah = kmalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + if (dev->n_ahs_allocated == ib_ipath_max_ahs) { + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + kfree(ah); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_ahs_allocated++; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + /* ib_create_ah() will initialize ah->ibah. */ + ah->attr = *ah_attr; + ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate); + + ret = &ah->ibah; + +bail: + return ret; +} + +/** + * ipath_destroy_ah - destroy an address handle + * @ibah: the AH to destroy + * + * This may be called from interrupt context. + */ +static int ipath_destroy_ah(struct ib_ah *ibah) +{ + struct ipath_ibdev *dev = to_idev(ibah->device); + struct ipath_ah *ah = to_iah(ibah); + unsigned long flags; + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + dev->n_ahs_allocated--; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + kfree(ah); + + return 0; +} + +static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct ipath_ah *ah = to_iah(ibah); + + *ah_attr = ah->attr; + ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate); + + return 0; +} + +/** + * ipath_get_npkeys - return the size of the PKEY table for port 0 + * @dd: the infinipath device + */ +unsigned ipath_get_npkeys(struct ipath_devdata *dd) +{ + return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); +} + +/** + * ipath_get_pkey - return the indexed PKEY from the port PKEY table + * @dd: the infinipath device + * @index: the PKEY index + */ +unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) +{ + unsigned ret; + + /* always a kernel port, no locking needed */ + if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) + ret = 0; + else + ret = dd->ipath_pd[0]->port_pkeys[index]; + + return ret; +} + +static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct ipath_ibdev *dev = to_idev(ibdev); + int ret; + + if (index >= ipath_get_npkeys(dev->dd)) { + ret = -EINVAL; + goto bail; + } + + *pkey = ipath_get_pkey(dev->dd, index); + ret = 0; + +bail: + return ret; +} + +/** + * ipath_alloc_ucontext - allocate a ucontest + * @ibdev: the infiniband device + * @udata: not used by the InfiniPath driver + */ + +static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct ipath_ucontext *context; + struct ib_ucontext *ret; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + ret = &context->ibucontext; + +bail: + return ret; +} + +static int ipath_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(to_iucontext(context)); + return 0; +} + +static int ipath_verbs_register_sysfs(struct ib_device *dev); + +static void __verbs_timer(unsigned long arg) +{ + struct ipath_devdata *dd = (struct ipath_devdata *) arg; + + /* Handle verbs layer timeouts. */ + ipath_ib_timer(dd->verbs_dev); + + mod_timer(&dd->verbs_timer, jiffies + 1); +} + +static int enable_timer(struct ipath_devdata *dd) +{ + /* + * Early chips had a design flaw where the chip and kernel idea + * of the tail register don't always agree, and therefore we won't + * get an interrupt on the next packet received. + * If the board supports per packet receive interrupts, use it. + * Otherwise, the timer function periodically checks for packets + * to cover this case. + * Either way, the timer is needed for verbs layer related + * processing. + */ + if (dd->ipath_flags & IPATH_GPIO_INTR) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, + 0x2074076542310ULL); + /* Enable GPIO bit 2 interrupt */ + dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + dd->ipath_gpio_mask); + } + + init_timer(&dd->verbs_timer); + dd->verbs_timer.function = __verbs_timer; + dd->verbs_timer.data = (unsigned long)dd; + dd->verbs_timer.expires = jiffies + 1; + add_timer(&dd->verbs_timer); + + return 0; +} + +static int disable_timer(struct ipath_devdata *dd) +{ + /* Disable GPIO bit 2 interrupt */ + if (dd->ipath_flags & IPATH_GPIO_INTR) { + /* Disable GPIO bit 2 interrupt */ + dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + dd->ipath_gpio_mask); + /* + * We might want to undo changes to debugportselect, + * but how? + */ + } + + del_timer_sync(&dd->verbs_timer); + + return 0; +} + +/** + * ipath_register_ib_device - register our device with the infiniband core + * @dd: the device data structure + * Return the allocated ipath_ibdev pointer or NULL on error. + */ +int ipath_register_ib_device(struct ipath_devdata *dd) +{ + struct ipath_verbs_counters cntrs; + struct ipath_ibdev *idev; + struct ib_device *dev; + struct ipath_verbs_txreq *tx; + unsigned i; + int ret; + + idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); + if (idev == NULL) { + ret = -ENOMEM; + goto bail; + } + + dev = &idev->ibdev; + + if (dd->ipath_sdma_descq_cnt) { + tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx, + GFP_KERNEL); + if (tx == NULL) { + ret = -ENOMEM; + goto err_tx; + } + } else + tx = NULL; + idev->txreq_bufs = tx; + + /* Only need to initialize non-zero fields. */ + spin_lock_init(&idev->n_pds_lock); + spin_lock_init(&idev->n_ahs_lock); + spin_lock_init(&idev->n_cqs_lock); + spin_lock_init(&idev->n_qps_lock); + spin_lock_init(&idev->n_srqs_lock); + spin_lock_init(&idev->n_mcast_grps_lock); + + spin_lock_init(&idev->qp_table.lock); + spin_lock_init(&idev->lk_table.lock); + idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); + /* Set the prefix to the default value (see ch. 4.1.1) */ + idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); + + ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); + if (ret) + goto err_qp; + + /* + * The top ib_ipath_lkey_table_size bits are used to index the + * table. The lower 8 bits can be owned by the user (copied from + * the LKEY). The remaining bits act as a generation number or tag. + */ + idev->lk_table.max = 1 << ib_ipath_lkey_table_size; + idev->lk_table.table = kzalloc(idev->lk_table.max * + sizeof(*idev->lk_table.table), + GFP_KERNEL); + if (idev->lk_table.table == NULL) { + ret = -ENOMEM; + goto err_lk; + } + INIT_LIST_HEAD(&idev->pending_mmaps); + spin_lock_init(&idev->pending_lock); + idev->mmap_offset = PAGE_SIZE; + spin_lock_init(&idev->mmap_offset_lock); + INIT_LIST_HEAD(&idev->pending[0]); + INIT_LIST_HEAD(&idev->pending[1]); + INIT_LIST_HEAD(&idev->pending[2]); + INIT_LIST_HEAD(&idev->piowait); + INIT_LIST_HEAD(&idev->rnrwait); + INIT_LIST_HEAD(&idev->txreq_free); + idev->pending_index = 0; + idev->port_cap_flags = + IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; + if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY) + idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; + idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; + idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; + idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; + idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; + idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; + + /* Snapshot current HW counters to "clear" them. */ + ipath_get_counters(dd, &cntrs); + idev->z_symbol_error_counter = cntrs.symbol_error_counter; + idev->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + idev->z_link_downed_counter = cntrs.link_downed_counter; + idev->z_port_rcv_errors = cntrs.port_rcv_errors; + idev->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + idev->z_port_xmit_discards = cntrs.port_xmit_discards; + idev->z_port_xmit_data = cntrs.port_xmit_data; + idev->z_port_rcv_data = cntrs.port_rcv_data; + idev->z_port_xmit_packets = cntrs.port_xmit_packets; + idev->z_port_rcv_packets = cntrs.port_rcv_packets; + idev->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + idev->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + idev->z_vl15_dropped = cntrs.vl15_dropped; + + for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++) + list_add(&tx->txreq.list, &idev->txreq_free); + + /* + * The system image GUID is supposed to be the same for all + * IB HCAs in a single system but since there can be other + * device types in the system, we can't be sure this is unique. + */ + if (!sys_image_guid) + sys_image_guid = dd->ipath_guid; + idev->sys_image_guid = sys_image_guid; + idev->ib_unit = dd->ipath_unit; + idev->dd = dd; + + strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); + dev->owner = THIS_MODULE; + dev->node_guid = dd->ipath_guid; + dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; + dev->uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | + (1ull << IB_USER_VERBS_CMD_QUERY_AH) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + dev->node_type = RDMA_NODE_IB_CA; + dev->phys_port_cnt = 1; + dev->num_comp_vectors = 1; + dev->dma_device = &dd->pcidev->dev; + dev->query_device = ipath_query_device; + dev->modify_device = ipath_modify_device; + dev->query_port = ipath_query_port; + dev->modify_port = ipath_modify_port; + dev->query_pkey = ipath_query_pkey; + dev->query_gid = ipath_query_gid; + dev->alloc_ucontext = ipath_alloc_ucontext; + dev->dealloc_ucontext = ipath_dealloc_ucontext; + dev->alloc_pd = ipath_alloc_pd; + dev->dealloc_pd = ipath_dealloc_pd; + dev->create_ah = ipath_create_ah; + dev->destroy_ah = ipath_destroy_ah; + dev->query_ah = ipath_query_ah; + dev->create_srq = ipath_create_srq; + dev->modify_srq = ipath_modify_srq; + dev->query_srq = ipath_query_srq; + dev->destroy_srq = ipath_destroy_srq; + dev->create_qp = ipath_create_qp; + dev->modify_qp = ipath_modify_qp; + dev->query_qp = ipath_query_qp; + dev->destroy_qp = ipath_destroy_qp; + dev->post_send = ipath_post_send; + dev->post_recv = ipath_post_receive; + dev->post_srq_recv = ipath_post_srq_receive; + dev->create_cq = ipath_create_cq; + dev->destroy_cq = ipath_destroy_cq; + dev->resize_cq = ipath_resize_cq; + dev->poll_cq = ipath_poll_cq; + dev->req_notify_cq = ipath_req_notify_cq; + dev->get_dma_mr = ipath_get_dma_mr; + dev->reg_phys_mr = ipath_reg_phys_mr; + dev->reg_user_mr = ipath_reg_user_mr; + dev->dereg_mr = ipath_dereg_mr; + dev->alloc_fmr = ipath_alloc_fmr; + dev->map_phys_fmr = ipath_map_phys_fmr; + dev->unmap_fmr = ipath_unmap_fmr; + dev->dealloc_fmr = ipath_dealloc_fmr; + dev->attach_mcast = ipath_multicast_attach; + dev->detach_mcast = ipath_multicast_detach; + dev->process_mad = ipath_process_mad; + dev->mmap = ipath_mmap; + dev->dma_ops = &ipath_dma_mapping_ops; + + snprintf(dev->node_desc, sizeof(dev->node_desc), + IPATH_IDSTR " %s", init_utsname()->nodename); + + ret = ib_register_device(dev, NULL); + if (ret) + goto err_reg; + + if (ipath_verbs_register_sysfs(dev)) + goto err_class; + + enable_timer(dd); + + goto bail; + +err_class: + ib_unregister_device(dev); +err_reg: + kfree(idev->lk_table.table); +err_lk: + kfree(idev->qp_table.table); +err_qp: + kfree(idev->txreq_bufs); +err_tx: + ib_dealloc_device(dev); + ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); + idev = NULL; + +bail: + dd->verbs_dev = idev; + return ret; +} + +void ipath_unregister_ib_device(struct ipath_ibdev *dev) +{ + struct ib_device *ibdev = &dev->ibdev; + u32 qps_inuse; + + ib_unregister_device(ibdev); + + disable_timer(dev->dd); + + if (!list_empty(&dev->pending[0]) || + !list_empty(&dev->pending[1]) || + !list_empty(&dev->pending[2])) + ipath_dev_err(dev->dd, "pending list not empty!\n"); + if (!list_empty(&dev->piowait)) + ipath_dev_err(dev->dd, "piowait list not empty!\n"); + if (!list_empty(&dev->rnrwait)) + ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); + if (!ipath_mcast_tree_empty()) + ipath_dev_err(dev->dd, "multicast table memory leak!\n"); + /* + * Note that ipath_unregister_ib_device() can be called before all + * the QPs are destroyed! + */ + qps_inuse = ipath_free_all_qps(&dev->qp_table); + if (qps_inuse) + ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", + qps_inuse); + kfree(dev->qp_table.table); + kfree(dev->lk_table.table); + kfree(dev->txreq_bufs); + ib_dealloc_device(ibdev); +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ipath_ibdev *dev = + container_of(device, struct ipath_ibdev, ibdev.dev); + + return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ipath_ibdev *dev = + container_of(device, struct ipath_ibdev, ibdev.dev); + int ret; + + ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); + if (ret < 0) + goto bail; + strcat(buf, "\n"); + ret = strlen(buf); + +bail: + return ret; +} + +static ssize_t show_stats(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ipath_ibdev *dev = + container_of(device, struct ipath_ibdev, ibdev.dev); + int i; + int len; + + len = sprintf(buf, + "RC resends %d\n" + "RC no QACK %d\n" + "RC ACKs %d\n" + "RC SEQ NAKs %d\n" + "RC RDMA seq %d\n" + "RC RNR NAKs %d\n" + "RC OTH NAKs %d\n" + "RC timeouts %d\n" + "RC RDMA dup %d\n" + "piobuf wait %d\n" + "unaligned %d\n" + "PKT drops %d\n" + "WQE errs %d\n", + dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, + dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, + dev->n_other_naks, dev->n_timeouts, + dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, + dev->n_pkt_drops, dev->n_wqe_errs); + for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { + const struct ipath_opcode_stats *si = &dev->opstats[i]; + + if (!si->n_packets && !si->n_bytes) + continue; + len += sprintf(buf + len, "%02x %llu/%llu\n", i, + (unsigned long long) si->n_packets, + (unsigned long long) si->n_bytes); + } + return len; +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL); + +static struct device_attribute *ipath_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type, + &dev_attr_board_id, + &dev_attr_stats +}; + +static int ipath_verbs_register_sysfs(struct ib_device *dev) +{ + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) + if (device_create_file(&dev->dev, + ipath_class_attributes[i])) { + ret = 1; + goto bail; + } + + ret = 0; + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h new file mode 100644 index 00000000..ae6cff4a --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -0,0 +1,936 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IPATH_VERBS_H +#define IPATH_VERBS_H + +#include +#include +#include +#include +#include +#include +#include + +#include "ipath_kernel.h" + +#define IPATH_MAX_RDMA_ATOMIC 4 + +#define QPN_MAX (1 << 24) +#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IPATH_UVERBS_ABI_VERSION 2 + +/* + * Define an ib_cq_notify value that is not valid so we know when CQ + * notifications are armed. + */ +#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) + +/* AETH NAK opcode values */ +#define IB_RNR_NAK 0x20 +#define IB_NAK_PSN_ERROR 0x60 +#define IB_NAK_INVALID_REQUEST 0x61 +#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 +#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 +#define IB_NAK_INVALID_RD_REQUEST 0x64 + +/* Flags for checking QP state (see ib_ipath_state_ops[]) */ +#define IPATH_POST_SEND_OK 0x01 +#define IPATH_POST_RECV_OK 0x02 +#define IPATH_PROCESS_RECV_OK 0x04 +#define IPATH_PROCESS_SEND_OK 0x08 +#define IPATH_PROCESS_NEXT_SEND_OK 0x10 +#define IPATH_FLUSH_SEND 0x20 +#define IPATH_FLUSH_RECV 0x40 +#define IPATH_PROCESS_OR_FLUSH_SEND \ + (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) + +/* IB Performance Manager status values */ +#define IB_PMA_SAMPLE_STATUS_DONE 0x00 +#define IB_PMA_SAMPLE_STATUS_STARTED 0x01 +#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 + +/* Mandatory IB performance counter select values. */ +#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) +#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) +#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) +#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) +#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) + +struct ib_reth { + __be64 vaddr; + __be32 rkey; + __be32 length; +} __attribute__ ((packed)); + +struct ib_atomic_eth { + __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ + __be32 rkey; + __be64 swap_data; + __be64 compare_data; +} __attribute__ ((packed)); + +struct ipath_other_headers { + __be32 bth[3]; + union { + struct { + __be32 deth[2]; + __be32 imm_data; + } ud; + struct { + struct ib_reth reth; + __be32 imm_data; + } rc; + struct { + __be32 aeth; + __be32 atomic_ack_eth[2]; + } at; + __be32 imm_data; + __be32 aeth; + struct ib_atomic_eth atomic_eth; + } u; +} __attribute__ ((packed)); + +/* + * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes + * long (72 w/ imm_data). Only the first 56 bytes of the IB header + * will be in the eager header buffer. The remaining 12 or 16 bytes + * are in the data buffer. + */ +struct ipath_ib_header { + __be16 lrh[4]; + union { + struct { + struct ib_grh grh; + struct ipath_other_headers oth; + } l; + struct ipath_other_headers oth; + } u; +} __attribute__ ((packed)); + +struct ipath_pio_header { + __le32 pbc[2]; + struct ipath_ib_header hdr; +} __attribute__ ((packed)); + +/* + * There is one struct ipath_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct ipath_mcast_qp. + */ +struct ipath_mcast_qp { + struct list_head list; + struct ipath_qp *qp; +}; + +struct ipath_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + +/* Protection domain */ +struct ipath_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address Handle */ +struct ipath_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; +}; + +/* + * This structure is used by ipath_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct ipath_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct ipath_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + union { + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[0]; + struct ib_wc kqueue[0]; + }; +}; + +/* + * The completion queue structure. + */ +struct ipath_cq { + struct ib_cq ibcq; + struct tasklet_struct comptask; + spinlock_t lock; + u8 notify; + u8 triggered; + struct ipath_cq_wc *queue; + struct ipath_mmap_info *ip; +}; + +/* + * A segment is a linear region of low physical memory. + * XXX Maybe we should use phys addr here and kmap()/kunmap(). + * Used by the verbs layer. + */ +struct ipath_seg { + void *vaddr; + size_t length; +}; + +/* The number of ipath_segs that fit in a page. */ +#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) + +struct ipath_segarray { + struct ipath_seg segs[IPATH_SEGSZ]; +}; + +struct ipath_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of ipath_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + struct ipath_segarray *map[0]; /* the segments */ +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct ipath_sge { + struct ipath_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* Memory region */ +struct ipath_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct ipath_mregion mr; /* must be last */ +}; + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct ipath_swqe { + struct ib_send_wr wr; /* don't use wr.sg_list */ + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct ipath_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct ipath_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct ipath_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct ipath_rwqe wq[0]; +}; + +struct ipath_rq { + struct ipath_rwq *wq; + spinlock_t lock; + u32 size; /* size of RWQE array */ + u8 max_sge; +}; + +struct ipath_srq { + struct ib_srq ibsrq; + struct ipath_rq rq; + struct ipath_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + +struct ipath_sge_state { + struct ipath_sge *sg_list; /* next SGE to be used if any */ + struct ipath_sge sge; /* progress state for the current SGE */ + u8 num_sge; + u8 static_rate; +}; + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct ipath_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + union { + struct ipath_sge_state rdma_sge; + u64 atomic_data; + }; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct ipath_qp { + struct ib_qp ibqp; + struct ipath_qp *next; /* link list for QPN hash table */ + struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ + struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ + struct list_head piowait; /* link for wait PIO buf */ + struct list_head timerwait; /* link for waiting for timeouts */ + struct ib_ah_attr remote_ah_attr; + struct ipath_ib_header s_hdr; /* next packet header to send */ + atomic_t refcount; + wait_queue_head_t wait; + wait_queue_head_t wait_dma; + struct tasklet_struct s_task; + struct ipath_mmap_info *ip; + struct ipath_sge_state *s_cur_sge; + struct ipath_verbs_txreq *s_tx; + struct ipath_sge_state s_sge; /* current send request data */ + struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1]; + struct ipath_sge_state s_ack_rdma_sge; + struct ipath_sge_state s_rdma_read_sge; + struct ipath_sge_state r_sge; /* current receive data */ + spinlock_t s_lock; + atomic_t s_dma_busy; + u16 s_pkt_delay; + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u64 r_wr_id; /* ID for current receive WQE */ + unsigned long r_aflags; + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + u8 state; /* QP state */ + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_state; /* opcode of last packet received */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 r_flags; + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + u8 qp_access_flags; + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_pkey_index; /* PKEY index to use */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + u8 s_flags; + u8 s_dmult; + u8 s_draining; + u8 timeout; /* Timeout for this QP */ + enum ib_mtu path_mtu; + u32 remote_qpn; + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_last; /* last un-ACK'ed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + struct ipath_swqe *s_wq; /* send work queue */ + struct ipath_swqe *s_wqe; + struct ipath_sge *r_ud_sg_list; + struct ipath_rq r_rq; /* receive work queue */ + struct ipath_sge r_sg_list[0]; /* verified SGEs */ +}; + +/* + * Atomic bit definitions for r_aflags. + */ +#define IPATH_R_WRID_VALID 0 + +/* + * Bit definitions for r_flags. + */ +#define IPATH_R_REUSE_SGE 0x01 +#define IPATH_R_RDMAR_SEQ 0x02 + +/* + * Bit definitions for s_flags. + * + * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. + * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA. + */ +#define IPATH_S_SIGNAL_REQ_WR 0x01 +#define IPATH_S_FENCE_PENDING 0x02 +#define IPATH_S_RDMAR_PENDING 0x04 +#define IPATH_S_ACK_PENDING 0x08 +#define IPATH_S_BUSY 0x10 +#define IPATH_S_WAITING 0x20 +#define IPATH_S_WAIT_SSN_CREDIT 0x40 +#define IPATH_S_WAIT_DMA 0x80 + +#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ + IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) + +#define IPATH_PSN_CREDIT 512 + +/* + * Since struct ipath_swqe is not a fixed size, we can't simply index into + * struct ipath_qp.s_wq. This function does the array index computation. + */ +static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, + unsigned n) +{ + return (struct ipath_swqe *)((char *)qp->s_wq + + (sizeof(struct ipath_swqe) + + qp->s_max_sge * + sizeof(struct ipath_sge)) * n); +} + +/* + * Since struct ipath_rwqe is not a fixed size, we can't simply index into + * struct ipath_rwq.wq. This function does the array index computation. + */ +static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, + unsigned n) +{ + return (struct ipath_rwqe *) + ((char *) rq->wq->wq + + (sizeof(struct ipath_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct qpn_map { + atomic_t n_free; + void *page; +}; + +struct ipath_qp_table { + spinlock_t lock; + u32 last; /* last QP number allocated */ + u32 max; /* size of the hash table */ + u32 nmaps; /* size of the map table */ + struct ipath_qp **table; + /* bit map of free numbers */ + struct qpn_map map[QPNMAP_ENTRIES]; +}; + +struct ipath_lkey_table { + spinlock_t lock; + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct ipath_mregion **table; +}; + +struct ipath_opcode_stats { + u64 n_packets; /* number of packets */ + u64 n_bytes; /* total number of bytes */ +}; + +struct ipath_ibdev { + struct ib_device ibdev; + struct ipath_devdata *dd; + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; + u32 mmap_offset; + int ib_unit; /* This is the device number */ + u16 sm_lid; /* in host order */ + u8 sm_sl; + u8 mkeyprot; + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + + /* The following fields are really per port. */ + struct ipath_qp_table qp_table; + struct ipath_lkey_table lk_table; + struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */ + struct list_head piowait; /* list for wait PIO buf */ + struct list_head txreq_free; + void *txreq_bufs; + /* list of QPs waiting for RNR timer */ + struct list_head rnrwait; + spinlock_t pending_lock; + __be64 sys_image_guid; /* in network order */ + __be64 gid_prefix; /* in network order */ + __be64 mkey; + + u32 n_pds_allocated; /* number of PDs allocated for device */ + spinlock_t n_pds_lock; + u32 n_ahs_allocated; /* number of AHs allocated for device */ + spinlock_t n_ahs_lock; + u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; + u32 n_srqs_allocated; /* number of SRQs allocated for device */ + spinlock_t n_srqs_lock; + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + + u64 ipath_sword; /* total dwords sent (sample result) */ + u64 ipath_rword; /* total dwords received (sample result) */ + u64 ipath_spkts; /* total packets sent (sample result) */ + u64 ipath_rpkts; /* total packets received (sample result) */ + /* # of ticks no data sent (sample result) */ + u64 ipath_xmit_wait; + u64 rcv_errors; /* # of packets with SW detected rcv errs */ + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ + u64 z_symbol_error_counter; /* starting count for PMA */ + u64 z_link_error_recovery_counter; /* starting count for PMA */ + u64 z_link_downed_counter; /* starting count for PMA */ + u64 z_port_rcv_errors; /* starting count for PMA */ + u64 z_port_rcv_remphys_errors; /* starting count for PMA */ + u64 z_port_xmit_discards; /* starting count for PMA */ + u64 z_port_xmit_data; /* starting count for PMA */ + u64 z_port_rcv_data; /* starting count for PMA */ + u64 z_port_xmit_packets; /* starting count for PMA */ + u64 z_port_rcv_packets; /* starting count for PMA */ + u32 z_pkey_violations; /* starting count for PMA */ + u32 z_local_link_integrity_errors; /* starting count for PMA */ + u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ + u32 z_vl15_dropped; /* starting count for PMA */ + u32 n_rc_resends; + u32 n_rc_acks; + u32 n_rc_qacks; + u32 n_seq_naks; + u32 n_rdma_seq; + u32 n_rnr_naks; + u32 n_other_naks; + u32 n_timeouts; + u32 n_pkt_drops; + u32 n_vl15_dropped; + u32 n_wqe_errs; + u32 n_rdma_dup_busy; + u32 n_piowait; + u32 n_unaligned; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 qkey_violations; + u16 mkey_violations; + u16 mkey_lease_period; + u16 pending_index; /* which pending queue is active */ + u8 pma_sample_status; + u8 subnet_timeout; + u8 vl_high_limit; + struct ipath_opcode_stats opstats[128]; +}; + +struct ipath_verbs_counters { + u64 symbol_error_counter; + u64 link_error_recovery_counter; + u64 link_downed_counter; + u64 port_rcv_errors; + u64 port_rcv_remphys_errors; + u64 port_xmit_discards; + u64 port_xmit_data; + u64 port_rcv_data; + u64 port_xmit_packets; + u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; + u32 vl15_dropped; +}; + +struct ipath_verbs_txreq { + struct ipath_qp *qp; + struct ipath_swqe *wqe; + u32 map_len; + u32 len; + struct ipath_sge_state *ss; + struct ipath_pio_header hdr; + struct ipath_sdma_txreq txreq; +}; + +static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct ipath_mr, ibmr); +} + +static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct ipath_pd, ibpd); +} + +static inline struct ipath_ah *to_iah(struct ib_ah *ibah) +{ + return container_of(ibah, struct ipath_ah, ibah); +} + +static inline struct ipath_cq *to_icq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct ipath_cq, ibcq); +} + +static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct ipath_srq, ibsrq); +} + +static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct ipath_qp, ibqp); +} + +static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct ipath_ibdev, ibdev); +} + +/* + * This must be called with s_lock held. + */ +static inline void ipath_schedule_send(struct ipath_qp *qp) +{ + if (qp->s_flags & IPATH_S_ANY_WAIT) + qp->s_flags &= ~IPATH_S_ANY_WAIT; + if (!(qp->s_flags & IPATH_S_BUSY)) + tasklet_hi_schedule(&qp->s_task); +} + +int ipath_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); + +/* + * Compare the lower 24 bits of the two values. + * Returns an integer <, ==, or > than zero. + */ +static inline int ipath_cmp24(u32 a, u32 b) +{ + return (((int) a) - ((int) b)) << 8; +} + +struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); + +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait); + +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs); + +int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int ipath_mcast_tree_empty(void); + +__be32 ipath_compute_aeth(struct ipath_qp *qp); + +struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn); + +struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ipath_destroy_qp(struct ib_qp *ibqp); + +int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err); + +int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); + +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); + +int ipath_init_qp_table(struct ipath_ibdev *idev, int size); + +void ipath_get_credit(struct ipath_qp *qp, u32 aeth); + +unsigned ipath_ib_rate_to_mult(enum ib_rate rate); + +int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, + u32 hdrwords, struct ipath_sge_state *ss, u32 len); + +void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); + +void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); + +void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct ipath_qp *qp); + +void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct ipath_qp *qp); + +void ipath_restart_rc(struct ipath_qp *qp, u32 psn); + +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err); + +int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); + +void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct ipath_qp *qp); + +int ipath_alloc_lkey(struct ipath_lkey_table *rkt, + struct ipath_mregion *mr); + +void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); + +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, + struct ib_sge *sge, int acc); + +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, + u32 len, u64 vaddr, u32 rkey, int acc); + +int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + +int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata); + +int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); + +int ipath_destroy_srq(struct ib_srq *ibsrq); + +void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); + +int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); + +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ipath_destroy_cq(struct ib_cq *ibcq); + +int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); + +int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); + +struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc); + +struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start); + +struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata); + +int ipath_dereg_mr(struct ib_mr *ibmr); + +struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, + int list_len, u64 iova); + +int ipath_unmap_fmr(struct list_head *fmr_list); + +int ipath_dealloc_fmr(struct ib_fmr *ibfmr); + +void ipath_release_mmap_info(struct kref *ref); + +struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj); + +void ipath_update_mmap_info(struct ipath_ibdev *dev, + struct ipath_mmap_info *ip, + u32 size, void *obj); + +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +void ipath_insert_rnr_queue(struct ipath_qp *qp); + +int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, + u32 *lengthp, struct ipath_sge_state *ss); + +int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); + +u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, + struct ib_global_route *grh, u32 hwords, u32 nwords); + +void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, + struct ipath_other_headers *ohdr, + u32 bth0, u32 bth2); + +void ipath_do_send(unsigned long data); + +void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, + enum ib_wc_status status); + +int ipath_make_rc_req(struct ipath_qp *qp); + +int ipath_make_uc_req(struct ipath_qp *qp); + +int ipath_make_ud_req(struct ipath_qp *qp); + +int ipath_register_ib_device(struct ipath_devdata *); + +void ipath_unregister_ib_device(struct ipath_ibdev *); + +void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); + +int ipath_ib_piobufavail(struct ipath_ibdev *); + +unsigned ipath_get_npkeys(struct ipath_devdata *); + +u32 ipath_get_cr_errpkey(struct ipath_devdata *); + +unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); + +extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; + +/* + * Below converts HCA-specific LinkTrainingState to IB PhysPortState + * values. + */ +extern const u8 ipath_cvt_physportstate[]; +#define IB_PHYSPORTSTATE_SLEEP 1 +#define IB_PHYSPORTSTATE_POLL 2 +#define IB_PHYSPORTSTATE_DISABLED 3 +#define IB_PHYSPORTSTATE_CFG_TRAIN 4 +#define IB_PHYSPORTSTATE_LINKUP 5 +#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6 + +extern const int ib_ipath_state_ops[]; + +extern unsigned int ib_ipath_lkey_table_size; + +extern unsigned int ib_ipath_max_cqes; + +extern unsigned int ib_ipath_max_cqs; + +extern unsigned int ib_ipath_max_qp_wrs; + +extern unsigned int ib_ipath_max_qps; + +extern unsigned int ib_ipath_max_sges; + +extern unsigned int ib_ipath_max_mcast_grps; + +extern unsigned int ib_ipath_max_mcast_qp_attached; + +extern unsigned int ib_ipath_max_srqs; + +extern unsigned int ib_ipath_max_srq_sges; + +extern unsigned int ib_ipath_max_srq_wrs; + +extern const u32 ib_ipath_rnr_table[]; + +extern struct ib_dma_mapping_ops ipath_dma_mapping_ops; + +#endif /* IPATH_VERBS_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c new file mode 100644 index 00000000..6216ea92 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "ipath_verbs.h" + +/* + * Global table of GID to attached QPs. + * The table is global to all ipath devices since a send from one QP/device + * needs to be locally routed to any locally attached QPs on the same + * or different device. + */ +static struct rb_root mcast_tree; +static DEFINE_SPINLOCK(mcast_lock); + +/** + * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * @qp: the QP to link + */ +static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp) +{ + struct ipath_mcast_qp *mqp; + + mqp = kmalloc(sizeof *mqp, GFP_KERNEL); + if (!mqp) + goto bail; + + mqp->qp = qp; + atomic_inc(&qp->refcount); + +bail: + return mqp; +} + +static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp) +{ + struct ipath_qp *qp = mqp->qp; + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + kfree(mqp); +} + +/** + * ipath_mcast_alloc - allocate the multicast GID structure + * @mgid: the multicast GID + * + * A list of QPs will be attached to this structure. + */ +static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid) +{ + struct ipath_mcast *mcast; + + mcast = kmalloc(sizeof *mcast, GFP_KERNEL); + if (!mcast) + goto bail; + + mcast->mgid = *mgid; + INIT_LIST_HEAD(&mcast->qp_list); + init_waitqueue_head(&mcast->wait); + atomic_set(&mcast->refcount, 0); + mcast->n_attached = 0; + +bail: + return mcast; +} + +static void ipath_mcast_free(struct ipath_mcast *mcast) +{ + struct ipath_mcast_qp *p, *tmp; + + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) + ipath_mcast_qp_free(p); + + kfree(mcast); +} + +/** + * ipath_mcast_find - search the global table for the given multicast GID + * @mgid: the multicast GID to search for + * + * Returns NULL if not found. + * + * The caller is responsible for decrementing the reference count if found. + */ +struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid) +{ + struct rb_node *n; + unsigned long flags; + struct ipath_mcast *mcast; + + spin_lock_irqsave(&mcast_lock, flags); + n = mcast_tree.rb_node; + while (n) { + int ret; + + mcast = rb_entry(n, struct ipath_mcast, rb_node); + + ret = memcmp(mgid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else { + atomic_inc(&mcast->refcount); + spin_unlock_irqrestore(&mcast_lock, flags); + goto bail; + } + } + spin_unlock_irqrestore(&mcast_lock, flags); + + mcast = NULL; + +bail: + return mcast; +} + +/** + * ipath_mcast_add - insert mcast GID into table and attach QP struct + * @mcast: the mcast GID table + * @mqp: the QP to attach + * + * Return zero if both were added. Return EEXIST if the GID was already in + * the table but the QP was added. Return ESRCH if the QP was already + * attached and neither structure was added. + */ +static int ipath_mcast_add(struct ipath_ibdev *dev, + struct ipath_mcast *mcast, + struct ipath_mcast_qp *mqp) +{ + struct rb_node **n = &mcast_tree.rb_node; + struct rb_node *pn = NULL; + int ret; + + spin_lock_irq(&mcast_lock); + + while (*n) { + struct ipath_mcast *tmcast; + struct ipath_mcast_qp *p; + + pn = *n; + tmcast = rb_entry(pn, struct ipath_mcast, rb_node); + + ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = &pn->rb_left; + continue; + } + if (ret > 0) { + n = &pn->rb_right; + continue; + } + + /* Search the QP list to see if this is already there. */ + list_for_each_entry_rcu(p, &tmcast->qp_list, list) { + if (p->qp == mqp->qp) { + ret = ESRCH; + goto bail; + } + } + if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) { + ret = ENOMEM; + goto bail; + } + + tmcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &tmcast->qp_list); + ret = EEXIST; + goto bail; + } + + spin_lock(&dev->n_mcast_grps_lock); + if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) { + spin_unlock(&dev->n_mcast_grps_lock); + ret = ENOMEM; + goto bail; + } + + dev->n_mcast_grps_allocated++; + spin_unlock(&dev->n_mcast_grps_lock); + + mcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &mcast->qp_list); + + atomic_inc(&mcast->refcount); + rb_link_node(&mcast->rb_node, pn, n); + rb_insert_color(&mcast->rb_node, &mcast_tree); + + ret = 0; + +bail: + spin_unlock_irq(&mcast_lock); + + return ret; +} + +int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ipath_qp *qp = to_iqp(ibqp); + struct ipath_ibdev *dev = to_idev(ibqp->device); + struct ipath_mcast *mcast; + struct ipath_mcast_qp *mqp; + int ret; + + /* + * Allocate data structures since its better to do this outside of + * spin locks and it will most likely be needed. + */ + mcast = ipath_mcast_alloc(gid); + if (mcast == NULL) { + ret = -ENOMEM; + goto bail; + } + mqp = ipath_mcast_qp_alloc(qp); + if (mqp == NULL) { + ipath_mcast_free(mcast); + ret = -ENOMEM; + goto bail; + } + switch (ipath_mcast_add(dev, mcast, mqp)) { + case ESRCH: + /* Neither was used: can't attach the same QP twice. */ + ipath_mcast_qp_free(mqp); + ipath_mcast_free(mcast); + ret = -EINVAL; + goto bail; + case EEXIST: /* The mcast wasn't used */ + ipath_mcast_free(mcast); + break; + case ENOMEM: + /* Exceeded the maximum number of mcast groups. */ + ipath_mcast_qp_free(mqp); + ipath_mcast_free(mcast); + ret = -ENOMEM; + goto bail; + default: + break; + } + + ret = 0; + +bail: + return ret; +} + +int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ipath_qp *qp = to_iqp(ibqp); + struct ipath_ibdev *dev = to_idev(ibqp->device); + struct ipath_mcast *mcast = NULL; + struct ipath_mcast_qp *p, *tmp; + struct rb_node *n; + int last = 0; + int ret; + + spin_lock_irq(&mcast_lock); + + /* Find the GID in the mcast table. */ + n = mcast_tree.rb_node; + while (1) { + if (n == NULL) { + spin_unlock_irq(&mcast_lock); + ret = -EINVAL; + goto bail; + } + + mcast = rb_entry(n, struct ipath_mcast, rb_node); + ret = memcmp(gid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + break; + } + + /* Search the QP list. */ + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { + if (p->qp != qp) + continue; + /* + * We found it, so remove it, but don't poison the forward + * link until we are sure there are no list walkers. + */ + list_del_rcu(&p->list); + mcast->n_attached--; + + /* If this was the last attached QP, remove the GID too. */ + if (list_empty(&mcast->qp_list)) { + rb_erase(&mcast->rb_node, &mcast_tree); + last = 1; + } + break; + } + + spin_unlock_irq(&mcast_lock); + + if (p) { + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + ipath_mcast_qp_free(p); + } + if (last) { + atomic_dec(&mcast->refcount); + wait_event(mcast->wait, !atomic_read(&mcast->refcount)); + ipath_mcast_free(mcast); + spin_lock_irq(&dev->n_mcast_grps_lock); + dev->n_mcast_grps_allocated--; + spin_unlock_irq(&dev->n_mcast_grps_lock); + } + + ret = 0; + +bail: + return ret; +} + +int ipath_mcast_tree_empty(void) +{ + return mcast_tree.rb_node == NULL; +} diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c new file mode 100644 index 00000000..1d7bd82a --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on PowerPC only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include "ipath_kernel.h" + +/** + * ipath_enable_wc - enable write combining for MMIO writes to the device + * @dd: infinipath device + * + * Nothing to do on PowerPC, so just return without error. + */ +int ipath_enable_wc(struct ipath_devdata *dd) +{ + return 0; +} + +/** + * ipath_unordered_wc - indicate whether write combining is unordered + * + * Because our performance depends on our ability to do write + * combining mmio writes in the most efficient way, we need to + * know if we are on a processor that may reorder stores when + * write combining. + */ +int ipath_unordered_wc(void) +{ + return 1; +} diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c new file mode 100644 index 00000000..3428acb0 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on x86_64 only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include +#include +#include + +#include "ipath_kernel.h" + +/** + * ipath_enable_wc - enable write combining for MMIO writes to the device + * @dd: infinipath device + * + * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable + * write combining. + */ +int ipath_enable_wc(struct ipath_devdata *dd) +{ + int ret = 0; + u64 pioaddr, piolen; + unsigned bits; + const unsigned long addr = pci_resource_start(dd->pcidev, 0); + const size_t len = pci_resource_len(dd->pcidev, 0); + + /* + * Set the PIO buffers to be WCCOMB, so we get HT bursts to the + * chip. Linux (possibly the hardware) requires it to be on a power + * of 2 address matching the length (which has to be a power of 2). + * For rev1, that means the base address, for rev2, it will be just + * the PIO buffers themselves. + * For chips with two sets of buffers, the calculations are + * somewhat more complicated; we need to sum, and the piobufbase + * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. + * The buffers are still packed, so a single range covers both. + */ + if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */ + unsigned long pio2kbase, pio4kbase; + pio2kbase = dd->ipath_piobufbase & 0xffffffffUL; + pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL; + if (pio2kbase < pio4kbase) { /* all, for now */ + pioaddr = addr + pio2kbase; + piolen = pio4kbase - pio2kbase + + dd->ipath_piobcnt4k * dd->ipath_4kalign; + } else { + pioaddr = addr + pio4kbase; + piolen = pio2kbase - pio4kbase + + dd->ipath_piobcnt2k * dd->ipath_palign; + } + } else { /* single buffer size (2K, currently) */ + pioaddr = addr + dd->ipath_piobufbase; + piolen = dd->ipath_piobcnt2k * dd->ipath_palign + + dd->ipath_piobcnt4k * dd->ipath_4kalign; + } + + for (bits = 0; !(piolen & (1ULL << bits)); bits++) + /* do nothing */ ; + + if (piolen != (1ULL << bits)) { + piolen >>= bits; + while (piolen >>= 1) + bits++; + piolen = 1ULL << (bits + 1); + } + if (pioaddr & (piolen - 1)) { + u64 atmp; + ipath_dbg("pioaddr %llx not on right boundary for size " + "%llx, fixing\n", + (unsigned long long) pioaddr, + (unsigned long long) piolen); + atmp = pioaddr & ~(piolen - 1); + if (atmp < addr || (atmp + piolen) > (addr + len)) { + ipath_dev_err(dd, "No way to align address/size " + "(%llx/%llx), no WC mtrr\n", + (unsigned long long) atmp, + (unsigned long long) piolen << 1); + ret = -ENODEV; + } else { + ipath_dbg("changing WC base from %llx to %llx, " + "len from %llx to %llx\n", + (unsigned long long) pioaddr, + (unsigned long long) atmp, + (unsigned long long) piolen, + (unsigned long long) piolen << 1); + pioaddr = atmp; + piolen <<= 1; + } + } + + if (!ret) { + int cookie; + ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC " + "(addr %llx, len=0x%llx)\n", + (unsigned long long) pioaddr, + (unsigned long long) piolen); + cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); + if (cookie < 0) { + { + dev_info(&dd->pcidev->dev, + "mtrr_add() WC for PIO bufs " + "failed (%d)\n", + cookie); + ret = -EINVAL; + } + } else { + ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " + "cookie is %d\n", cookie); + dd->ipath_wc_cookie = cookie; + dd->ipath_wc_base = (unsigned long) pioaddr; + dd->ipath_wc_len = (unsigned long) piolen; + } + } + + return ret; +} + +/** + * ipath_disable_wc - disable write combining for MMIO writes to the device + * @dd: infinipath device + */ +void ipath_disable_wc(struct ipath_devdata *dd) +{ + if (dd->ipath_wc_cookie) { + int r; + ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); + r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len); + if (r < 0) + dev_info(&dd->pcidev->dev, + "mtrr_del(%lx, %lx, %lx) failed: %d\n", + dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len, r); + dd->ipath_wc_cookie = 0; /* even on failure */ + } +} + +/** + * ipath_unordered_wc - indicate whether write combining is ordered + * + * Because our performance depends on our ability to do write combining mmio + * writes in the most efficient way, we need to know if we are on an Intel + * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in + * the order completed, and so no special flushing is required to get + * correct ordering. Intel processors, however, will flush write buffers + * out in "random" orders, and so explicit ordering is needed at times. + */ +int ipath_unordered_wc(void) +{ + return boot_cpu_data.x86_vendor != X86_VENDOR_AMD; +} diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig new file mode 100644 index 00000000..24ab11a9 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/Kconfig @@ -0,0 +1,10 @@ +config MLX4_INFINIBAND + tristate "Mellanox ConnectX HCA support" + depends on NETDEVICES && ETHERNET && PCI + select NET_VENDOR_MELLANOX + select MLX4_CORE + ---help--- + This driver provides low-level InfiniBand support for + Mellanox ConnectX PCI Express host channel adapters (HCAs). + This is required to use InfiniBand protocols such as + IP-over-IB or SRP with these devices. diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile new file mode 100644 index 00000000..70f09c78 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o + +mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c new file mode 100644 index 00000000..a251becd --- /dev/null +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include +#include +#include + +#include "mlx4_ib.h" + +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port) +{ + struct in6_addr in6; + + *is_mcast = 0; + + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); + if (rdma_link_local_addr(&in6)) + rdma_get_ll_mac(&in6, mac); + else if (rdma_is_multicast_addr(&in6)) { + rdma_get_mcast_mac(&in6, mac); + *is_mcast = 1; + } else + return -EINVAL; + + return 0; +} + +static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_dev *dev = to_mdev(pd->device)->dev; + + ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.ib.g_slid = ah_attr->src_path_bits; + if (ah_attr->ah_flags & IB_AH_GRH) { + ah->av.ib.g_slid |= 0x80; + ah->av.ib.gid_index = ah_attr->grh.sgid_index; + ah->av.ib.hop_limit = ah_attr->grh.hop_limit; + ah->av.ib.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); + } + + ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) + --ah->av.ib.stat_rate; + } + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + + return &ah->ibah; +} + +static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_dev *dev = ibdev->dev; + union ib_gid sgid; + u8 mac[6]; + int err; + int is_mcast; + u16 vlan_tag; + + err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); + if (err) + return ERR_PTR(err); + + memcpy(ah->av.eth.mac, mac, 6); + err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); + if (err) + return ERR_PTR(err); + vlan_tag = rdma_get_vlan_id(&sgid); + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & 7) << 13; + ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ah->av.eth.vlan = cpu_to_be16(vlan_tag); + if (ah_attr->static_rate) { + ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) + --ah->av.eth.stat_rate; + } + + /* + * HW requires multicast LID so we just choose one. + */ + if (is_mcast) + ah->av.ib.dlid = cpu_to_be16(0xc000); + + memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); + ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + + return &ah->ibah; +} + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah; + struct ib_ah *ret; + + ah = kzalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { + if (!(ah_attr->ah_flags & IB_AH_GRH)) { + ret = ERR_PTR(-EINVAL); + } else { + /* + * TBD: need to handle the case when we get + * called in an atomic context and there we + * might sleep. We don't expect this + * currently since we're working with link + * local addresses which we can translate + * without going to sleep. + */ + ret = create_iboe_ah(pd, ah_attr, ah); + } + + if (IS_ERR(ret)) + kfree(ah); + + return ret; + } else + return create_ib_ah(pd, ah_attr, ah); /* never fails */ +} + +int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah = to_mah(ibah); + enum rdma_link_layer ll; + + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; + ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; + if (ah->av.ib.stat_rate) + ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; + ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; + + if (mlx4_ib_ah_grh_present(ah)) { + ah_attr->ah_flags = IB_AH_GRH; + + ah_attr->grh.traffic_class = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20; + ah_attr->grh.flow_label = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; + ah_attr->grh.hop_limit = ah->av.ib.hop_limit; + ah_attr->grh.sgid_index = ah->av.ib.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); + } + + return 0; +} + +int mlx4_ib_destroy_ah(struct ib_ah *ah) +{ + kfree(to_mah(ah)); + return 0; +} diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c new file mode 100644 index 00000000..77c8cb4c --- /dev/null +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -0,0 +1,825 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "mlx4_ib.h" +#include "user.h" + +static void mlx4_ib_cq_comp(struct mlx4_cq *cq) +{ + struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; + ibcq->comp_handler(ibcq, ibcq->cq_context); +} + +static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) +{ + struct ib_event event; + struct ib_cq *ibcq; + + if (type != MLX4_EVENT_TYPE_CQ_ERROR) { + printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + "on CQ %06x\n", type, cq->cqn); + return; + } + + ibcq = &to_mibcq(cq)->ibcq; + if (ibcq->event_handler) { + event.device = ibcq->device; + event.event = IB_EVENT_CQ_ERR; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } +} + +static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) +{ + return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); +} + +static void *get_cqe(struct mlx4_ib_cq *cq, int n) +{ + return get_cqe_from_buf(&cq->buf, n); +} + +static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) +{ + struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); + + return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; +} + +static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + struct mlx4_ib_cq *mcq = to_mcq(cq); + struct mlx4_ib_dev *dev = to_mdev(cq->device); + + return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period); +} + +static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent) +{ + int err; + + err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), + PAGE_SIZE * 2, &buf->buf); + + if (err) + goto out; + + err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, + &buf->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + if (err) + goto err_mtt; + + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &buf->mtt); + +err_buf: + mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), + &buf->buf); + +out: + return err; +} + +static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) +{ + mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); +} + +static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, + struct mlx4_ib_cq_buf *buf, struct ib_umem **umem, + u64 buf_addr, int cqe) +{ + int err; + + *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(*umem)) + return PTR_ERR(*umem); + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem), + ilog2((*umem)->page_size), &buf->mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem); + if (err) + goto err_mtt; + + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &buf->mtt); + +err_buf: + ib_umem_release(*umem); + + return err; +} + +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_cq *cq; + struct mlx4_uar *uar; + int err; + + if (entries < 1 || entries > dev->dev->caps.max_cqes) + return ERR_PTR(-EINVAL); + + cq = kmalloc(sizeof *cq, GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + entries = roundup_pow_of_two(entries + 1); + cq->ibcq.cqe = entries - 1; + mutex_init(&cq->resize_mutex); + spin_lock_init(&cq->lock); + cq->resize_buf = NULL; + cq->resize_umem = NULL; + + if (context) { + struct mlx4_ib_create_cq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + err = -EFAULT; + goto err_cq; + } + + err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, + ucmd.buf_addr, entries); + if (err) + goto err_cq; + + err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + &cq->db); + if (err) + goto err_mtt; + + uar = &to_mucontext(context)->uar; + } else { + err = mlx4_db_alloc(dev->dev, &cq->db, 1); + if (err) + goto err_cq; + + cq->mcq.set_ci_db = cq->db.db; + cq->mcq.arm_db = cq->db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + + err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries); + if (err) + goto err_db; + + uar = &dev->priv_uar; + } + + err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, + cq->db.dma, &cq->mcq, vector, 0); + if (err) + goto err_dbmap; + + cq->mcq.comp = mlx4_ib_cq_comp; + cq->mcq.event = mlx4_ib_cq_event; + + if (context) + if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { + err = -EFAULT; + goto err_dbmap; + } + + return &cq->ibcq; + +err_dbmap: + if (context) + mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); + + if (context) + ib_umem_release(cq->umem); + else + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + +err_db: + if (!context) + mlx4_db_free(dev->dev, &cq->db); + +err_cq: + kfree(cq); + + return ERR_PTR(err); +} + +static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, + int entries) +{ + int err; + + if (cq->resize_buf) + return -EBUSY; + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) + return -ENOMEM; + + err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); + if (err) { + kfree(cq->resize_buf); + cq->resize_buf = NULL; + return err; + } + + cq->resize_buf->cqe = entries - 1; + + return 0; +} + +static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, + int entries, struct ib_udata *udata) +{ + struct mlx4_ib_resize_cq ucmd; + int err; + + if (cq->resize_umem) + return -EBUSY; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return -EFAULT; + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) + return -ENOMEM; + + err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf, + &cq->resize_umem, ucmd.buf_addr, entries); + if (err) { + kfree(cq->resize_buf); + cq->resize_buf = NULL; + return err; + } + + cq->resize_buf->cqe = entries - 1; + + return 0; +} + +static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) +{ + u32 i; + + i = cq->mcq.cons_index; + while (get_sw_cqe(cq, i & cq->ibcq.cqe)) + ++i; + + return i - cq->mcq.cons_index; +} + +static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) +{ + struct mlx4_cqe *cqe, *new_cqe; + int i; + + i = cq->mcq.cons_index; + cqe = get_cqe(cq, i & cq->ibcq.cqe); + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { + new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, + (i + 1) & cq->resize_buf->cqe); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | + (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); + cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + } + ++cq->mcq.cons_index; +} + +int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibcq->device); + struct mlx4_ib_cq *cq = to_mcq(ibcq); + struct mlx4_mtt mtt; + int outst_cqe; + int err; + + mutex_lock(&cq->resize_mutex); + + if (entries < 1 || entries > dev->dev->caps.max_cqes) { + err = -EINVAL; + goto out; + } + + entries = roundup_pow_of_two(entries + 1); + if (entries == ibcq->cqe + 1) { + err = 0; + goto out; + } + + if (ibcq->uobject) { + err = mlx4_alloc_resize_umem(dev, cq, entries, udata); + if (err) + goto out; + } else { + /* Can't be smaller than the number of outstanding CQEs */ + outst_cqe = mlx4_ib_get_outstanding_cqes(cq); + if (entries < outst_cqe + 1) { + err = 0; + goto out; + } + + err = mlx4_alloc_resize_buf(dev, cq, entries); + if (err) + goto out; + } + + mtt = cq->buf.mtt; + + err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); + if (err) + goto err_buf; + + mlx4_mtt_cleanup(dev->dev, &mtt); + if (ibcq->uobject) { + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + ib_umem_release(cq->umem); + cq->umem = cq->resize_umem; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + cq->resize_umem = NULL; + } else { + struct mlx4_ib_cq_buf tmp_buf; + int tmp_cqe = 0; + + spin_lock_irq(&cq->lock); + if (cq->resize_buf) { + mlx4_ib_cq_resize_copy_cqes(cq); + tmp_buf = cq->buf; + tmp_cqe = cq->ibcq.cqe; + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + } + spin_unlock_irq(&cq->lock); + + if (tmp_cqe) + mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe); + } + + goto out; + +err_buf: + mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt); + if (!ibcq->uobject) + mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, + cq->resize_buf->cqe); + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + + if (cq->resize_umem) { + ib_umem_release(cq->resize_umem); + cq->resize_umem = NULL; + } + +out: + mutex_unlock(&cq->resize_mutex); + return err; +} + +int mlx4_ib_destroy_cq(struct ib_cq *cq) +{ + struct mlx4_ib_dev *dev = to_mdev(cq->device); + struct mlx4_ib_cq *mcq = to_mcq(cq); + + mlx4_cq_free(dev->dev, &mcq->mcq); + mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); + + if (cq->uobject) { + mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); + ib_umem_release(mcq->umem); + } else { + mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); + mlx4_db_free(dev->dev, &mcq->db); + } + + kfree(mcq); + + return 0; +} + +static void dump_cqe(void *cqe) +{ + __be32 *buf = cqe; + + printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), + be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), + be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); +} + +static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, + struct ib_wc *wc) +{ + if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { + printk(KERN_DEBUG "local QP operation err " + "(QPN %06x, WQE index %x, vendor syndrome %02x, " + "opcode = %02x)\n", + be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), + cqe->vendor_err_syndrome, + cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); + dump_cqe(cqe); + } + + switch (cqe->syndrome) { + case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WC_LOC_QP_OP_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + case MLX4_CQE_SYNDROME_MW_BIND_ERR: + wc->status = IB_WC_MW_BIND_ERR; + break; + case MLX4_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WC_BAD_RESP_ERR; + break; + case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WC_REM_INV_REQ_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WC_REM_ACCESS_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WC_REM_OP_ERR; + break; + case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WC_RETRY_EXC_ERR; + break; + case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: + wc->status = IB_WC_REM_ABORT_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + wc->vendor_err = cqe->vendor_err_syndrome; +} + +static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) +{ + return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV4F | + MLX4_CQE_STATUS_IPV4OPT | + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPOK)) == + cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPOK)) && + (status & cpu_to_be16(MLX4_CQE_STATUS_UDP | + MLX4_CQE_STATUS_TCP)) && + checksum == cpu_to_be16(0xffff); +} + +static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, + struct mlx4_ib_qp **cur_qp, + struct ib_wc *wc) +{ + struct mlx4_cqe *cqe; + struct mlx4_qp *mqp; + struct mlx4_ib_wq *wq; + struct mlx4_ib_srq *srq; + int is_send; + int is_error; + u32 g_mlpath_rqpn; + u16 wqe_ctr; + +repoll: + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + ++cq->mcq.cons_index; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; + is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR; + + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && + is_send)) { + printk(KERN_WARNING "Completion for NOP opcode detected!\n"); + return -EINVAL; + } + + /* Resize CQ in progress */ + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { + if (cq->resize_buf) { + struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device); + + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + } + + goto repoll; + } + + if (!*cur_qp || + (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, + be32_to_cpu(cqe->vlan_my_qpn)); + if (unlikely(!mqp)) { + printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", + cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); + return -EINVAL; + } + + *cur_qp = to_mibqp(mqp); + } + + wc->qp = &(*cur_qp)->ibqp; + + if (is_send) { + wq = &(*cur_qp)->sq; + if (!(*cur_qp)->sq_signal_bits) { + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wq->tail += (u16) (wqe_ctr - (u16) wq->tail); + } + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if ((*cur_qp)->ibqp.srq) { + srq = to_msrq((*cur_qp)->ibqp.srq); + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx4_ib_free_srq_wqe(srq, wqe_ctr); + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + + if (unlikely(is_error)) { + mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); + return 0; + } + + wc->status = IB_WC_SUCCESS; + + if (is_send) { + wc->wc_flags = 0; + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_OPCODE_RDMA_WRITE_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX4_OPCODE_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case MLX4_OPCODE_SEND_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX4_OPCODE_SEND: + case MLX4_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_SEND; + break; + case MLX4_OPCODE_RDMA_READ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + break; + case MLX4_OPCODE_ATOMIC_CS: + wc->opcode = IB_WC_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX4_OPCODE_ATOMIC_FA: + wc->opcode = IB_WC_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX4_OPCODE_MASKED_ATOMIC_CS: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX4_OPCODE_MASKED_ATOMIC_FA: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX4_OPCODE_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + case MLX4_OPCODE_LSO: + wc->opcode = IB_WC_LSO; + break; + case MLX4_OPCODE_FMR: + wc->opcode = IB_WC_FAST_REG_MR; + break; + case MLX4_OPCODE_LOCAL_INVAL: + wc->opcode = IB_WC_LOCAL_INV; + break; + } + } else { + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + + switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { + case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; + break; + case MLX4_RECV_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); + break; + case MLX4_RECV_OPCODE_SEND: + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + break; + case MLX4_RECV_OPCODE_SEND_IMM: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; + break; + } + + wc->slid = be16_to_cpu(cqe->rlid); + g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); + wc->src_qp = g_mlpath_rqpn & 0xffffff; + wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; + wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; + wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; + wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, + cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; + if (rdma_port_get_link_layer(wc->qp->device, + (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) + wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; + else + wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; + } + + return 0; +} + +int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct mlx4_ib_cq *cq = to_mcq(ibcq); + struct mlx4_ib_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + int err = 0; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); + if (err) + break; + } + + mlx4_cq_set_ci(&cq->mcq); + + spin_unlock_irqrestore(&cq->lock, flags); + + if (err == 0 || err == -EAGAIN) + return npolled; + else + return err; +} + +int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + mlx4_cq_arm(&to_mcq(ibcq)->mcq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT, + to_mdev(ibcq->device)->uar_map, + MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock)); + + return 0; +} + +void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) +{ + u32 prod_index; + int nfreed = 0; + struct mlx4_cqe *cqe, *dest; + u8 owner_bit; + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index) + if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) + break; + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { + if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) + mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; + memcpy(dest, cqe, sizeof *cqe); + dest->owner_sr_opcode = owner_bit | + (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); + } + } + + if (nfreed) { + cq->mcq.cons_index += nfreed; + /* + * Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + mlx4_cq_set_ci(&cq->mcq); + } +} + +void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) +{ + spin_lock_irq(&cq->lock); + __mlx4_ib_cq_clean(cq, qpn, srq); + spin_unlock_irq(&cq->lock); +} diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c new file mode 100644 index 00000000..8aee4233 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "mlx4_ib.h" + +struct mlx4_ib_user_db_page { + struct list_head list; + struct ib_umem *umem; + unsigned long user_virt; + int refcnt; +}; + +int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, + struct mlx4_db *db) +{ + struct mlx4_ib_user_db_page *page; + struct ib_umem_chunk *chunk; + int err = 0; + + mutex_lock(&context->db_page_mutex); + + list_for_each_entry(page, &context->db_page_list, list) + if (page->user_virt == (virt & PAGE_MASK)) + goto found; + + page = kmalloc(sizeof *page, GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + page->user_virt = (virt & PAGE_MASK); + page->refcnt = 0; + page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, + PAGE_SIZE, 0, 0); + if (IS_ERR(page->umem)) { + err = PTR_ERR(page->umem); + kfree(page); + goto out; + } + + list_add(&page->list, &context->db_page_list); + +found: + chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); + db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->u.user_page = page; + ++page->refcnt; + +out: + mutex_unlock(&context->db_page_mutex); + + return err; +} + +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db) +{ + mutex_lock(&context->db_page_mutex); + + if (!--db->u.user_page->refcnt) { + list_del(&db->u.user_page->list); + ib_umem_release(db->u.user_page->umem); + kfree(db->u.user_page); + } + + mutex_unlock(&context->db_page_mutex); +} diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c new file mode 100644 index 00000000..259b0670 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include +#include +#include + +#include "mlx4_ib.h" + +enum { + MLX4_IB_VENDOR_CLASS1 = 0x9, + MLX4_IB_VENDOR_CLASS2 = 0xa +}; + +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad) +{ + struct mlx4_cmd_mailbox *inmailbox, *outmailbox; + void *inbox; + int err; + u32 in_modifier = port; + u8 op_modifier = 0; + + inmailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + inbox = inmailbox->buf; + + outmailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(outmailbox)) { + mlx4_free_cmd_mailbox(dev->dev, inmailbox); + return PTR_ERR(outmailbox); + } + + memcpy(inbox, in_mad, 256); + + /* + * Key check traps can't be generated unless we have in_wc to + * tell us where to send the trap. + */ + if (ignore_mkey || !in_wc) + op_modifier |= 0x1; + if (ignore_bkey || !in_wc) + op_modifier |= 0x2; + + if (in_wc) { + struct { + __be32 my_qpn; + u32 reserved1; + __be32 rqpn; + u8 sl; + u8 g_path; + u16 reserved2[2]; + __be16 pkey; + u32 reserved3[11]; + u8 grh[40]; + } *ext_info; + + memset(inbox + 256, 0, 256); + ext_info = inbox + 256; + + ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num); + ext_info->rqpn = cpu_to_be32(in_wc->src_qp); + ext_info->sl = in_wc->sl << 4; + ext_info->g_path = in_wc->dlid_path_bits | + (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); + ext_info->pkey = cpu_to_be16(in_wc->pkey_index); + + if (in_grh) + memcpy(ext_info->grh, in_grh, 40); + + op_modifier |= 0x4; + + in_modifier |= in_wc->slid << 16; + } + + err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, + in_modifier, op_modifier, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + + if (!err) + memcpy(response_mad, outmailbox->buf, 256); + + mlx4_free_cmd_mailbox(dev->dev, inmailbox); + mlx4_free_cmd_mailbox(dev->dev, outmailbox); + + return err; +} + +static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) +{ + struct ib_ah *new_ah; + struct ib_ah_attr ah_attr; + + if (!dev->send_agent[port_num - 1][0]) + return; + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = lid; + ah_attr.sl = sl; + ah_attr.port_num = port_num; + + new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, + &ah_attr); + if (IS_ERR(new_ah)) + return; + + spin_lock(&dev->sm_lock); + if (dev->sm_ah[port_num - 1]) + ib_destroy_ah(dev->sm_ah[port_num - 1]); + dev->sm_ah[port_num - 1] = new_ah; + spin_unlock(&dev->sm_lock); +} + +/* + * Snoop SM MADs for port info and P_Key table sets, so we can + * synthesize LID change and P_Key change events. + */ +static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, + u16 prev_lid) +{ + struct ib_event event; + + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_SET) { + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { + struct ib_port_info *pinfo = + (struct ib_port_info *) ((struct ib_smp *) mad)->data; + u16 lid = be16_to_cpu(pinfo->lid); + + update_sm_ah(to_mdev(ibdev), port_num, + be16_to_cpu(pinfo->sm_lid), + pinfo->neighbormtu_mastersmsl & 0xf); + + event.device = ibdev; + event.element.port_num = port_num; + + if (pinfo->clientrereg_resv_subnetto & 0x80) { + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); + } + + if (prev_lid != lid) { + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + } + + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { + event.device = ibdev; + event.event = IB_EVENT_PKEY_CHANGE; + event.element.port_num = port_num; + ib_dispatch_event(&event); + } + } +} + +static void node_desc_override(struct ib_device *dev, + struct ib_mad *mad) +{ + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && + mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { + spin_lock(&to_mdev(dev)->sm_lock); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + spin_unlock(&to_mdev(dev)->sm_lock); + } +} + +static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad) +{ + int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; + struct ib_mad_send_buf *send_buf; + struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; + int ret; + + if (agent) { + send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + return; + /* + * We rely here on the fact that MLX QPs don't use the + * address handle after the send is posted (this is + * wrong following the IB spec strictly, but we know + * it's OK for our devices). + */ + spin_lock(&dev->sm_lock); + memcpy(send_buf->mad, mad, sizeof *mad); + if ((send_buf->ah = dev->sm_ah[port_num - 1])) + ret = ib_post_send_mad(send_buf, NULL); + else + ret = -EINVAL; + spin_unlock(&dev->sm_lock); + + if (ret) + ib_free_send_mad(send_buf); + } +} + +static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + u16 slid, prev_lid = 0; + int err; + struct ib_port_attr pattr; + + slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { + forward_trap(to_mdev(ibdev), port_num, in_mad); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } + + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* + * Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } else + return IB_MAD_RESULT_SUCCESS; + + if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && + in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + !ib_query_port(ibdev, port_num, &pattr)) + prev_lid = pattr.lid; + + err = mlx4_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err) + return IB_MAD_RESULT_FAILURE; + + if (!out_mad->mad_hdr.status) { + smp_snoop(ibdev, port_num, in_mad, prev_lid); + node_desc_override(ibdev, out_mad); + } + + /* set return bit in status of directed route responses */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +static void edit_counter(struct mlx4_counter *cnt, + struct ib_pma_portcounters *pma_cnt) +{ + pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2)); + pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2)); + pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->tx_frames)); + pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->rx_frames)); +} + +static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + int err; + u32 inmod = dev->counters[port_num - 1] & 0xffff; + u8 mode; + + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return -EINVAL; + + mailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(mailbox)) + return IB_MAD_RESULT_FAILURE; + + err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, + MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_WRAPPED); + if (err) + err = IB_MAD_RESULT_FAILURE; + else { + memset(out_mad->data, 0, sizeof out_mad->data); + mode = ((struct mlx4_counter *)mailbox->buf)->counter_mode; + switch (mode & 0xf) { + case 0: + edit_counter(mailbox->buf, + (void *)(out_mad->data + 40)); + err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + break; + default: + err = IB_MAD_RESULT_FAILURE; + } + } + + mlx4_free_cmd_mailbox(dev->dev, mailbox); + + return err; +} + +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + switch (rdma_port_get_link_layer(ibdev, port_num)) { + case IB_LINK_LAYER_INFINIBAND: + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + case IB_LINK_LAYER_ETHERNET: + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + default: + return -EINVAL; + } +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + ib_free_send_mad(mad_send_wc->send_buf); +} + +int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) +{ + struct ib_mad_agent *agent; + int p, q; + int ret; + enum rdma_link_layer ll; + + for (p = 0; p < dev->num_ports; ++p) { + ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1); + for (q = 0; q <= 1; ++q) { + if (ll == IB_LINK_LAYER_INFINIBAND) { + agent = ib_register_mad_agent(&dev->ib_dev, p + 1, + q ? IB_QPT_GSI : IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + dev->send_agent[p][q] = agent; + } else + dev->send_agent[p][q] = NULL; + } + } + + return 0; + +err: + for (p = 0; p < dev->num_ports; ++p) + for (q = 0; q <= 1; ++q) + if (dev->send_agent[p][q]) + ib_unregister_mad_agent(dev->send_agent[p][q]); + + return ret; +} + +void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) +{ + struct ib_mad_agent *agent; + int p, q; + + for (p = 0; p < dev->num_ports; ++p) { + for (q = 0; q <= 1; ++q) { + agent = dev->send_agent[p][q]; + if (agent) { + dev->send_agent[p][q] = NULL; + ib_unregister_mad_agent(agent); + } + } + + if (dev->sm_ah[p]) + ib_destroy_ah(dev->sm_ah[p]); + } +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c new file mode 100644 index 00000000..b948b6dd --- /dev/null +++ b/drivers/infiniband/hw/mlx4/main.c @@ -0,0 +1,1370 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "mlx4_ib.h" +#include "user.h" + +#define DRV_NAME "mlx4_ib" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "April 4, 2008" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static const char mlx4_ib_version[] = + DRV_NAME ": Mellanox ConnectX InfiniBand driver v" + DRV_VERSION " (" DRV_RELDATE ")\n"; + +struct update_gid_work { + struct work_struct work; + union ib_gid gids[128]; + struct mlx4_ib_dev *dev; + int port; +}; + +static struct workqueue_struct *wq; + +static void init_query_mad(struct ib_smp *mad) +{ + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} + +static union ib_gid zgid; + +static int mlx4_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memset(props, 0, sizeof *props); + + props->fw_ver = dev->dev->caps.fw_ver; + props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | + IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM) + props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) + props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) + props->device_cap_flags |= IB_DEVICE_UD_TSO; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; + + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + + props->max_mr_size = ~0ull; + props->page_size_cap = dev->dev->caps.page_size_cap; + props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; + props->max_qp_wr = dev->dev->caps.max_wqes; + props->max_sge = min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg); + props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; + props->max_cqe = dev->dev->caps.max_cqes; + props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws; + props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; + props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; + props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; + props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; + props->max_srq_sge = dev->dev->caps.max_srq_sge; + props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; + props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; + props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->masked_atomic_cap = IB_ATOMIC_HCA; + props->max_pkeys = dev->dev->caps.pkey_table_len[1]; + props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; + props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +static enum rdma_link_layer +mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) +{ + struct mlx4_dev *dev = to_mdev(device)->dev; + + return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; +} + +static int ib_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int ext_active_speed; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, + in_mad, out_mad); + if (err) + goto out; + + + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = IB_SPEED_FDR; + break; + case 2: + props->active_speed = IB_SPEED_EDR; + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == IB_SPEED_QDR) { + init_query_mad(in_mad); + in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = IB_SPEED_FDR10; + } + + /* Avoid wrong speed value returned by FW if the IB link is down. */ + if (props->state == IB_PORT_DOWN) + props->active_speed = IB_SPEED_SDR; + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static u8 state_to_phys_state(enum ib_port_state state) +{ + return state == IB_PORT_ACTIVE ? 5 : 3; +} + +static int eth_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + struct mlx4_ib_iboe *iboe = &mdev->iboe; + struct net_device *ndev; + enum ib_mtu tmp; + struct mlx4_cmd_mailbox *mailbox; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = IB_SPEED_QDR; + props->port_cap_flags = IB_PORT_CM_SUP; + props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; + props->max_msg_sz = mdev->dev->caps.max_msg_sz; + props->pkey_tbl_len = 1; + props->max_mtu = IB_MTU_4096; + props->max_vl_num = 2; + props->state = IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + props->active_mtu = IB_MTU_256; + spin_lock(&iboe->lock); + ndev = iboe->netdevs[port - 1]; + if (!ndev) + goto out_unlock; + + tmp = iboe_get_mtu(ndev->mtu); + props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; + + props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); +out_unlock: + spin_unlock(&iboe->lock); +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return err; +} + +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + int err; + + memset(props, 0, sizeof *props); + + err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? + ib_link_query_port(ibdev, port, props) : + eth_link_query_port(ibdev, port, props); + + return err; +} + +static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + + *gid = dev->iboe.gid_table[port - 1][index]; + + return 0; +} + +static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + return __mlx4_ib_query_gid(ibdev, port, index, gid); + else + return iboe_query_gid(ibdev, port, index, gid); +} + +static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props) +{ + struct mlx4_cmd_mailbox *mailbox; + + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + spin_lock(&to_mdev(ibdev)->sm_lock); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock(&to_mdev(ibdev)->sm_lock); + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); + if (IS_ERR(mailbox)) + return 0; + + memset(mailbox->buf, 0, 256); + memcpy(mailbox->buf, props->node_desc, 64); + mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); + + return 0; +} + +static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, + u32 cap_mask) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; + + mailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, 256); + + if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; + ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); + } else { + ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols; + ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); + } + + err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev->dev, mailbox); + return err; +} + +static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct ib_port_attr attr; + u32 cap_mask; + int err; + + mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); + + err = mlx4_ib_query_port(ibdev, port, &attr); + if (err) + goto out; + + cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & + ~props->clr_port_cap_mask; + + err = mlx4_SET_PORT(to_mdev(ibdev), port, + !!(mask & IB_PORT_RESET_QKEY_CNTR), + cap_mask); + +out: + mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); + return err; +} + +static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_ucontext *context; + struct mlx4_ib_alloc_ucontext_resp resp; + int err; + + if (!dev->ib_active) + return ERR_PTR(-EAGAIN); + + resp.qp_tab_size = dev->dev->caps.num_qps; + resp.bf_reg_size = dev->dev->caps.bf_reg_size; + resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); + if (err) { + kfree(context); + return ERR_PTR(err); + } + + INIT_LIST_HEAD(&context->db_page_list); + mutex_init(&context->db_page_mutex); + + err = ib_copy_to_udata(udata, &resp, sizeof resp); + if (err) { + mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); + kfree(context); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; +} + +static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); + + mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); + kfree(context); + + return 0; +} + +static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct mlx4_ib_dev *dev = to_mdev(context->device); + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (vma->vm_pgoff == 0) { + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn + + dev->dev->caps.num_uars, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + } else + return -EINVAL; + + return 0; +} + +static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_pd *pd; + int err; + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) + if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { + mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); + kfree(pd); + return ERR_PTR(-EFAULT); + } + + return &pd->ibpd; +} + +static int mlx4_ib_dealloc_pd(struct ib_pd *pd) +{ + mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); + kfree(pd); + + return 0; +} + +static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_xrcd *xrcd; + int err; + + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); + if (err) + goto err1; + + xrcd->pd = ib_alloc_pd(ibdev); + if (IS_ERR(xrcd->pd)) { + err = PTR_ERR(xrcd->pd); + goto err2; + } + + xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); + if (IS_ERR(xrcd->cq)) { + err = PTR_ERR(xrcd->cq); + goto err3; + } + + return &xrcd->ibxrcd; + +err3: + ib_dealloc_pd(xrcd->pd); +err2: + mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); +err1: + kfree(xrcd); + return ERR_PTR(err); +} + +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + ib_destroy_cq(to_mxrcd(xrcd)->cq); + ib_dealloc_pd(to_mxrcd(xrcd)->pd); + mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + kfree(xrcd); + + return 0; +} + +static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) +{ + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_gid_entry *ge; + + ge = kzalloc(sizeof *ge, GFP_KERNEL); + if (!ge) + return -ENOMEM; + + ge->gid = *gid; + if (mlx4_ib_add_mc(mdev, mqp, gid)) { + ge->port = mqp->port; + ge->added = 1; + } + + mutex_lock(&mqp->mutex); + list_add_tail(&ge->list, &mqp->gid_list); + mutex_unlock(&mqp->mutex); + + return 0; +} + +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid) +{ + u8 mac[6]; + struct net_device *ndev; + int ret = 0; + + if (!mqp->port) + return 0; + + spin_lock(&mdev->iboe.lock); + ndev = mdev->iboe.netdevs[mqp->port - 1]; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + + if (ndev) { + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + rtnl_lock(); + dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac); + ret = 1; + rtnl_unlock(); + dev_put(ndev); + } + + return ret; +} + +static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, + !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + MLX4_PROT_IB_IPV6); + if (err) + return err; + + err = add_gid_entry(ibqp, gid); + if (err) + goto err_add; + + return 0; + +err_add: + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + return err; +} + +static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) +{ + struct mlx4_ib_gid_entry *ge; + struct mlx4_ib_gid_entry *tmp; + struct mlx4_ib_gid_entry *ret = NULL; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!memcmp(raw, ge->gid.raw, 16)) { + ret = ge; + break; + } + } + + return ret; +} + +static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u8 mac[6]; + struct net_device *ndev; + struct mlx4_ib_gid_entry *ge; + + err = mlx4_multicast_detach(mdev->dev, + &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + if (err) + return err; + + mutex_lock(&mqp->mutex); + ge = find_gid_entry(mqp, gid->raw); + if (ge) { + spin_lock(&mdev->iboe.lock); + ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + if (ndev) { + rtnl_lock(); + dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac); + rtnl_unlock(); + dev_put(ndev); + } + list_del(&ge->list); + kfree(ge); + } else + printk(KERN_WARNING "could not find mgid entry\n"); + + mutex_unlock(&mqp->mutex); + + return 0; +} + +static int init_node_data(struct mlx4_ib_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev.dev); + return sprintf(buf, "MT%d\n", dev->dev->pdev->device); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev.dev); + return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), + (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->dev->caps.fw_ver & 0xffff); +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev.dev); + return sprintf(buf, "%x\n", dev->dev->rev_id); +} + +static ssize_t show_board(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev.dev); + return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, + dev->dev->board_id); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *mlx4_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id +}; + +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + +static void update_gids_task(struct work_struct *work) +{ + struct update_gid_work *gw = container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; + struct mlx4_dev *dev = gw->dev->dev; + struct ib_event event; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox)); + return; + } + + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof gw->gids); + + err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + if (err) + printk(KERN_WARNING "set port command failed\n"); + else { + memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); + event.device = &gw->dev->ib_dev; + event.element.port_num = gw->port; + event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&event); + } + + mlx4_free_cmd_mailbox(dev, mailbox); + kfree(gw); +} + +static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) +{ + struct net_device *ndev = dev->iboe.netdevs[port - 1]; + struct update_gid_work *work; + struct net_device *tmp; + int i; + u8 *hits; + int ret; + union ib_gid gid; + int free; + int found; + int need_update = 0; + u16 vid; + + work = kzalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; + + hits = kzalloc(128, GFP_ATOMIC); + if (!hits) { + ret = -ENOMEM; + goto out; + } + + rcu_read_lock(); + for_each_netdev_rcu(&init_net, tmp) { + if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { + gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + vid = rdma_vlan_dev_vlan_id(tmp); + mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); + found = 0; + free = -1; + for (i = 0; i < 128; ++i) { + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + free = i; + if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { + hits[i] = 1; + found = 1; + break; + } + } + + if (!found) { + if (tmp == ndev && + (memcmp(&dev->iboe.gid_table[port - 1][0], + &gid, sizeof gid) || + !memcmp(&dev->iboe.gid_table[port - 1][0], + &zgid, sizeof gid))) { + dev->iboe.gid_table[port - 1][0] = gid; + ++need_update; + hits[0] = 1; + } else if (free >= 0) { + dev->iboe.gid_table[port - 1][free] = gid; + hits[free] = 1; + ++need_update; + } + } + } + } + rcu_read_unlock(); + + for (i = 0; i < 128; ++i) + if (!hits[i]) { + if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + ++need_update; + dev->iboe.gid_table[port - 1][i] = zgid; + } + + if (need_update) { + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); + } else + kfree(work); + + kfree(hits); + return 0; + +out: + kfree(work); + return ret; +} + +static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) +{ + switch (event) { + case NETDEV_UP: + case NETDEV_CHANGEADDR: + update_ipv6_gids(dev, port, 0); + break; + + case NETDEV_DOWN: + update_ipv6_gids(dev, port, 1); + dev->iboe.netdevs[port - 1] = NULL; + } +} + +static void netdev_added(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 0); +} + +static void netdev_removed(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 1); +} + +static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct mlx4_ib_dev *ibdev; + struct net_device *oldnd; + struct mlx4_ib_iboe *iboe; + int port; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); + iboe = &ibdev->iboe; + + spin_lock(&iboe->lock); + mlx4_foreach_ib_transport_port(port, ibdev->dev) { + oldnd = iboe->netdevs[port - 1]; + iboe->netdevs[port - 1] = + mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); + if (oldnd != iboe->netdevs[port - 1]) { + if (iboe->netdevs[port - 1]) + netdev_added(ibdev, port); + else + netdev_removed(ibdev, port); + } + } + + if (dev == iboe->netdevs[0] || + (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) + handle_en_event(ibdev, 1, event); + else if (dev == iboe->netdevs[1] + || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) + handle_en_event(ibdev, 2, event); + + spin_unlock(&iboe->lock); + + return NOTIFY_DONE; +} + +static void *mlx4_ib_add(struct mlx4_dev *dev) +{ + struct mlx4_ib_dev *ibdev; + int num_ports = 0; + int i; + int err; + struct mlx4_ib_iboe *iboe; + + printk_once(KERN_INFO "%s", mlx4_ib_version); + + if (mlx4_is_mfunc(dev)) { + printk(KERN_WARNING "IB not yet supported in SRIOV\n"); + return NULL; + } + + mlx4_foreach_ib_transport_port(i, dev) + num_ports++; + + /* No point in registering a device with no ports... */ + if (num_ports == 0) + return NULL; + + ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); + if (!ibdev) { + dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); + return NULL; + } + + iboe = &ibdev->iboe; + + if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) + goto err_dealloc; + + if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) + goto err_pd; + + ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT, + PAGE_SIZE); + if (!ibdev->uar_map) + goto err_uar; + MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); + + ibdev->dev = dev; + + strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); + ibdev->ib_dev.owner = THIS_MODULE; + ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; + ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; + ibdev->num_ports = num_ports; + ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; + ibdev->ib_dev.dma_device = &dev->pdev->dev; + + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + ibdev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); + + ibdev->ib_dev.query_device = mlx4_ib_query_device; + ibdev->ib_dev.query_port = mlx4_ib_query_port; + ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; + ibdev->ib_dev.query_gid = mlx4_ib_query_gid; + ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; + ibdev->ib_dev.modify_device = mlx4_ib_modify_device; + ibdev->ib_dev.modify_port = mlx4_ib_modify_port; + ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; + ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; + ibdev->ib_dev.mmap = mlx4_ib_mmap; + ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; + ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; + ibdev->ib_dev.create_ah = mlx4_ib_create_ah; + ibdev->ib_dev.query_ah = mlx4_ib_query_ah; + ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; + ibdev->ib_dev.create_srq = mlx4_ib_create_srq; + ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; + ibdev->ib_dev.query_srq = mlx4_ib_query_srq; + ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; + ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; + ibdev->ib_dev.create_qp = mlx4_ib_create_qp; + ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; + ibdev->ib_dev.query_qp = mlx4_ib_query_qp; + ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; + ibdev->ib_dev.post_send = mlx4_ib_post_send; + ibdev->ib_dev.post_recv = mlx4_ib_post_recv; + ibdev->ib_dev.create_cq = mlx4_ib_create_cq; + ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; + ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; + ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; + ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; + ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; + ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; + ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; + ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; + ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; + ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; + ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; + ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; + ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; + ibdev->ib_dev.process_mad = mlx4_ib_process_mad; + + ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; + ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; + ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; + ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { + ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; + ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + } + + spin_lock_init(&iboe->lock); + + if (init_node_data(ibdev)) + goto err_map; + + for (i = 0; i < ibdev->num_ports; ++i) { + if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == + IB_LINK_LAYER_ETHERNET) { + err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); + if (err) + ibdev->counters[i] = -1; + } else + ibdev->counters[i] = -1; + } + + spin_lock_init(&ibdev->sm_lock); + mutex_init(&ibdev->cap_mask_mutex); + + if (ib_register_device(&ibdev->ib_dev, NULL)) + goto err_counter; + + if (mlx4_ib_mad_init(ibdev)) + goto err_reg; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { + iboe->nb.notifier_call = mlx4_ib_netdev_event; + err = register_netdevice_notifier(&iboe->nb); + if (err) + goto err_reg; + } + + for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) { + if (device_create_file(&ibdev->ib_dev.dev, + mlx4_class_attributes[i])) + goto err_notif; + } + + ibdev->ib_active = true; + + return ibdev; + +err_notif: + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + flush_workqueue(wq); + +err_reg: + ib_unregister_device(&ibdev->ib_dev); + +err_counter: + for (; i; --i) + if (ibdev->counters[i - 1] != -1) + mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); + +err_map: + iounmap(ibdev->uar_map); + +err_uar: + mlx4_uar_free(dev, &ibdev->priv_uar); + +err_pd: + mlx4_pd_free(dev, ibdev->priv_pdn); + +err_dealloc: + ib_dealloc_device(&ibdev->ib_dev); + + return NULL; +} + +static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) +{ + struct mlx4_ib_dev *ibdev = ibdev_ptr; + int p; + + mlx4_ib_mad_cleanup(ibdev); + ib_unregister_device(&ibdev->ib_dev); + if (ibdev->iboe.nb.notifier_call) { + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + iounmap(ibdev->uar_map); + for (p = 0; p < ibdev->num_ports; ++p) + if (ibdev->counters[p] != -1) + mlx4_counter_free(ibdev->dev, ibdev->counters[p]); + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) + mlx4_CLOSE_PORT(dev, p); + + mlx4_uar_free(dev, &ibdev->priv_uar); + mlx4_pd_free(dev, ibdev->priv_pdn); + ib_dealloc_device(&ibdev->ib_dev); +} + +static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, + enum mlx4_dev_event event, int port) +{ + struct ib_event ibev; + struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + + if (port > ibdev->num_ports) + return; + + switch (event) { + case MLX4_DEV_EVENT_PORT_UP: + ibev.event = IB_EVENT_PORT_ACTIVE; + break; + + case MLX4_DEV_EVENT_PORT_DOWN: + ibev.event = IB_EVENT_PORT_ERR; + break; + + case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: + ibdev->ib_active = false; + ibev.event = IB_EVENT_DEVICE_FATAL; + break; + + default: + return; + } + + ibev.device = ibdev_ptr; + ibev.element.port_num = port; + + ib_dispatch_event(&ibev); +} + +static struct mlx4_interface mlx4_ib_interface = { + .add = mlx4_ib_add, + .remove = mlx4_ib_remove, + .event = mlx4_ib_event, + .protocol = MLX4_PROT_IB_IPV6 +}; + +static int __init mlx4_ib_init(void) +{ + int err; + + wq = create_singlethread_workqueue("mlx4_ib"); + if (!wq) + return -ENOMEM; + + err = mlx4_register_interface(&mlx4_ib_interface); + if (err) { + destroy_workqueue(wq); + return err; + } + + return 0; +} + +static void __exit mlx4_ib_cleanup(void) +{ + mlx4_unregister_interface(&mlx4_ib_interface); + destroy_workqueue(wq); +} + +module_init(mlx4_ib_init); +module_exit(mlx4_ib_cleanup); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h new file mode 100644 index 00000000..ed80345c --- /dev/null +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_IB_H +#define MLX4_IB_H + +#include +#include +#include + +#include +#include + +#include +#include + +struct mlx4_ib_ucontext { + struct ib_ucontext ibucontext; + struct mlx4_uar uar; + struct list_head db_page_list; + struct mutex db_page_mutex; +}; + +struct mlx4_ib_pd { + struct ib_pd ibpd; + u32 pdn; +}; + +struct mlx4_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; + struct ib_pd *pd; + struct ib_cq *cq; +}; + +struct mlx4_ib_cq_buf { + struct mlx4_buf buf; + struct mlx4_mtt mtt; +}; + +struct mlx4_ib_cq_resize { + struct mlx4_ib_cq_buf buf; + int cqe; +}; + +struct mlx4_ib_cq { + struct ib_cq ibcq; + struct mlx4_cq mcq; + struct mlx4_ib_cq_buf buf; + struct mlx4_ib_cq_resize *resize_buf; + struct mlx4_db db; + spinlock_t lock; + struct mutex resize_mutex; + struct ib_umem *umem; + struct ib_umem *resize_umem; +}; + +struct mlx4_ib_mr { + struct ib_mr ibmr; + struct mlx4_mr mmr; + struct ib_umem *umem; +}; + +struct mlx4_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + __be64 *mapped_page_list; + dma_addr_t map; +}; + +struct mlx4_ib_fmr { + struct ib_fmr ibfmr; + struct mlx4_fmr mfmr; +}; + +struct mlx4_ib_wq { + u64 *wrid; + spinlock_t lock; + int wqe_cnt; + int max_post; + int max_gs; + int offset; + int wqe_shift; + unsigned head; + unsigned tail; +}; + +enum mlx4_ib_qp_flags { + MLX4_IB_QP_LSO = 1 << 0, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, +}; + +struct mlx4_ib_gid_entry { + struct list_head list; + union ib_gid gid; + int added; + u8 port; +}; + +struct mlx4_ib_qp { + struct ib_qp ibqp; + struct mlx4_qp mqp; + struct mlx4_buf buf; + + struct mlx4_db db; + struct mlx4_ib_wq rq; + + u32 doorbell_qpn; + __be32 sq_signal_bits; + unsigned sq_next_wqe; + int sq_max_wqes_per_wr; + int sq_spare_wqes; + struct mlx4_ib_wq sq; + + struct ib_umem *umem; + struct mlx4_mtt mtt; + int buf_size; + struct mutex mutex; + u16 xrcdn; + u32 flags; + u8 port; + u8 alt_port; + u8 atomic_rd_en; + u8 resp_depth; + u8 sq_no_prefetch; + u8 state; + int mlx_type; + struct list_head gid_list; +}; + +struct mlx4_ib_srq { + struct ib_srq ibsrq; + struct mlx4_srq msrq; + struct mlx4_buf buf; + struct mlx4_db db; + u64 *wrid; + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct ib_umem *umem; + struct mlx4_mtt mtt; + struct mutex mutex; +}; + +struct mlx4_ib_ah { + struct ib_ah ibah; + union mlx4_ext_av av; +}; + +struct mlx4_ib_iboe { + spinlock_t lock; + struct net_device *netdevs[MLX4_MAX_PORTS]; + struct notifier_block nb; + union ib_gid gid_table[MLX4_MAX_PORTS][128]; +}; + +struct mlx4_ib_dev { + struct ib_device ib_dev; + struct mlx4_dev *dev; + int num_ports; + void __iomem *uar_map; + + struct mlx4_uar priv_uar; + u32 priv_pdn; + MLX4_DECLARE_DOORBELL_LOCK(uar_lock); + + struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; + struct ib_ah *sm_ah[MLX4_MAX_PORTS]; + spinlock_t sm_lock; + + struct mutex cap_mask_mutex; + bool ib_active; + struct mlx4_ib_iboe iboe; + int counters[MLX4_MAX_PORTS]; +}; + +static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mlx4_ib_dev, ib_dev); +} + +static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext); +} + +static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mlx4_ib_pd, ibpd); +} + +static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd); +} + +static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mlx4_ib_cq, ibcq); +} + +static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq) +{ + return container_of(mcq, struct mlx4_ib_cq, mcq); +} + +static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mlx4_ib_mr, ibmr); +} + +static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); +} + +static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); +} +static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mlx4_ib_qp, ibqp); +} + +static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp) +{ + return container_of(mqp, struct mlx4_ib_qp, mqp); +} + +static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mlx4_ib_srq, ibsrq); +} + +static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq) +{ + return container_of(msrq, struct mlx4_ib_srq, msrq); +} + +static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mlx4_ib_ah, ibah); +} + +int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, + struct mlx4_db *db); +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); + +struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); +int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, + struct ib_umem *umem); +struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); + +int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *context, + struct ib_udata *udata); +int mlx4_ib_destroy_cq(struct ib_cq *cq); +int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); +void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int mlx4_ib_destroy_ah(struct ib_ah *ah); + +struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); +int mlx4_ib_destroy_srq(struct ib_srq *srq); +void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_qp(struct ib_qp *qp); +int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad); +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); +void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); + +struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); +int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, + u64 iova); +int mlx4_ib_unmap_fmr(struct list_head *fmr_list); +int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); + +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port); + +static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) +{ + u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; + + if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) + return 1; + + return !!(ah->av.ib.g_slid & 0x80); +} + +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid); + +#endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c new file mode 100644 index 00000000..dca55b19 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "mlx4_ib.h" + +static u32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | + MLX4_PERM_LOCAL_READ; +} + +struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, + ~0ull, convert_access(acc), 0, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); + if (err) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, + struct ib_umem *umem) +{ + u64 *pages; + struct ib_umem_chunk *chunk; + int i, j, k; + int n; + int len; + int err = 0; + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = n = 0; + + list_for_each_entry(chunk, &umem->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(&chunk->page_list[j]) + + umem->page_size * k; + /* + * Be friendly to mlx4_write_mtt() and + * pass it chunks of appropriate size. + */ + if (i == PAGE_SIZE / sizeof (u64)) { + err = mlx4_write_mtt(dev->dev, mtt, n, + i, pages); + if (err) + goto out; + n += i; + i = 0; + } + } + } + + if (i) + err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); + +out: + free_page((unsigned long) pages); + return err; +} + +struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int shift; + int err; + int n; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + goto err_free; + } + + n = ib_umem_page_count(mr->umem); + shift = ilog2(mr->umem->page_size); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, + convert_access(access_flags), n, shift, &mr->mmr); + if (err) + goto err_umem; + + err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); + if (err) + goto err_mr; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +int mlx4_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + + mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); + + return 0; +} + +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, + max_page_list_len, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(dev->dev, &mr->mmr); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof (u64); + + if (page_list_len > MLX4_MAX_FAST_REG_PAGES) + return ERR_PTR(-EINVAL); + + mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->mapped_page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx4_ib_dev *dev = to_mdev(page_list->device); + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + int size = page_list->max_page_list_len * sizeof (u64); + + dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, + mfrpl->map); + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); +} + +struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, + struct ib_fmr_attr *fmr_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_fmr *fmr; + int err = -ENOMEM; + + fmr = kmalloc(sizeof *fmr, GFP_KERNEL); + if (!fmr) + return ERR_PTR(-ENOMEM); + + err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc), + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift, &fmr->mfmr); + if (err) + goto err_free; + + err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); + if (err) + goto err_mr; + + fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key; + + return &fmr->ibfmr; + +err_mr: + mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + +err_free: + kfree(fmr); + + return ERR_PTR(err); +} + +int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int npages, u64 iova) +{ + struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); + struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device); + + return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova, + &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); +} + +int mlx4_ib_unmap_fmr(struct list_head *fmr_list) +{ + struct ib_fmr *ibfmr; + int err; + struct mlx4_dev *mdev = NULL; + + list_for_each_entry(ibfmr, fmr_list, list) { + if (mdev && to_mdev(ibfmr->device)->dev != mdev) + return -EINVAL; + mdev = to_mdev(ibfmr->device)->dev; + } + + if (!mdev) + return 0; + + list_for_each_entry(ibfmr, fmr_list, list) { + struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); + + mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); + } + + /* + * Make sure all MPT status updates are visible before issuing + * SYNC_TPT firmware command. + */ + wmb(); + + err = mlx4_SYNC_TPT(mdev); + if (err) + printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when " + "unmapping FMRs\n", err); + + return 0; +} + +int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) +{ + struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); + struct mlx4_ib_dev *dev = to_mdev(ibfmr->device); + int err; + + err = mlx4_fmr_free(dev->dev, &ifmr->mfmr); + + if (!err) + kfree(ifmr); + + return err; +} diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c new file mode 100644 index 00000000..3a784896 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -0,0 +1,2175 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +#include "mlx4_ib.h" +#include "user.h" + +enum { + MLX4_IB_ACK_REQ_FREQ = 8, +}; + +enum { + MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, + MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX4_IB_LINK_TYPE_IB = 0, + MLX4_IB_LINK_TYPE_ETH = 1 +}; + +enum { + /* + * Largest possible UD header: send with GRH and immediate + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) + */ + MLX4_IB_UD_HEADER_SIZE = 82, + MLX4_IB_LSO_HEADER_SPARE = 128, +}; + +enum { + MLX4_IB_IBOE_ETHERTYPE = 0x8915 +}; + +struct mlx4_ib_sqp { + struct mlx4_ib_qp qp; + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; +}; + +enum { + MLX4_IB_MIN_SQ_STRIDE = 6, + MLX4_IB_CACHE_LINE_SIZE = 64, +}; + +static const __be32 mlx4_ib_opcode[] = { + [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), + [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), + [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), + [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), + [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), + [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), + [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), + [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), +}; + +static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) +{ + return container_of(mqp, struct mlx4_ib_sqp, qp); +} + +static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + return qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; +} + +static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + return qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; +} + +static void *get_wqe(struct mlx4_ib_qp *qp, int offset) +{ + return mlx4_buf_offset(&qp->buf, offset); +} + +static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); +} + +static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); +} + +/* + * Stamp a SQ WQE so that it is invalid if prefetched by marking the + * first four bytes of every 64 byte chunk with + * 0x7FFFFFF | (invalid_ownership_value << 31). + * + * When the max work request size is less than or equal to the WQE + * basic block size, as an optimization, we can stamp all WQEs with + * 0xffffffff, and skip the very first chunk of each WQE. + */ +static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) +{ + __be32 *wqe; + int i; + int s; + int ind; + void *buf; + __be32 stamp; + struct mlx4_wqe_ctrl_seg *ctrl; + + if (qp->sq_max_wqes_per_wr > 1) { + s = roundup(size, 1U << qp->sq.wqe_shift); + for (i = 0; i < s; i += 64) { + ind = (i >> qp->sq.wqe_shift) + n; + stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : + cpu_to_be32(0xffffffff); + buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); + *wqe = stamp; + } + } else { + ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + s = (ctrl->fence_size & 0x3f) << 4; + for (i = 64; i < s; i += 64) { + wqe = buf + i; + *wqe = cpu_to_be32(0xffffffff); + } + } +} + +static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) +{ + struct mlx4_wqe_ctrl_seg *ctrl; + struct mlx4_wqe_inline_seg *inl; + void *wqe; + int s; + + ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + s = sizeof(struct mlx4_wqe_ctrl_seg); + + if (qp->ibqp.qp_type == IB_QPT_UD) { + struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; + struct mlx4_av *av = (struct mlx4_av *)dgram->av; + memset(dgram, 0, sizeof *dgram); + av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); + s += sizeof(struct mlx4_wqe_datagram_seg); + } + + /* Pad the remainder of the WQE with an inline data segment. */ + if (size > s) { + inl = wqe + s; + inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); + } + ctrl->srcrb_flags = 0; + ctrl->fence_size = size / 16; + /* + * Make sure descriptor is fully written before setting ownership bit + * (because HW can start executing as soon as we do). + */ + wmb(); + + ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | + (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); + + stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); +} + +/* Post NOP WQE to prevent wrap-around in the middle of WR */ +static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) +{ + unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); + if (unlikely(s < qp->sq_max_wqes_per_wr)) { + post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); + ind += s; + } + return ind; +} + +static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) +{ + struct ib_event event; + struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; + + if (type == MLX4_EVENT_TYPE_PATH_MIG) + to_mibqp(qp)->port = to_mibqp(qp)->alt_port; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + switch (type) { + case MLX4_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case MLX4_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case MLX4_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case MLX4_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + "on QP %06x\n", type, qp->qpn); + return; + } + + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +static int send_wqe_overhead(enum ib_qp_type type, u32 flags) +{ + /* + * UD WQEs must have a datagram segment. + * RC and UC WQEs might have a remote address segment. + * MLX WQEs need two extra inline data segments (for the UD + * header and space for the ICRC). + */ + switch (type) { + case IB_QPT_UD: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_datagram_seg) + + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); + case IB_QPT_UC: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_raddr_seg); + case IB_QPT_RC: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_raddr_seg); + case IB_QPT_SMI: + case IB_QPT_GSI: + return sizeof (struct mlx4_wqe_ctrl_seg) + + ALIGN(MLX4_IB_UD_HEADER_SIZE + + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, + MLX4_INLINE_ALIGN) * + sizeof (struct mlx4_wqe_inline_seg), + sizeof (struct mlx4_wqe_data_seg)) + + ALIGN(4 + + sizeof (struct mlx4_wqe_inline_seg), + sizeof (struct mlx4_wqe_data_seg)); + default: + return sizeof (struct mlx4_wqe_ctrl_seg); + } +} + +static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, + int is_user, int has_rq, struct mlx4_ib_qp *qp) +{ + /* Sanity check RQ size before proceeding */ + if (cap->max_recv_wr > dev->dev->caps.max_wqes || + cap->max_recv_sge > dev->dev->caps.max_rq_sg) + return -EINVAL; + + if (!has_rq) { + if (cap->max_recv_wr) + return -EINVAL; + + qp->rq.wqe_cnt = qp->rq.max_gs = 0; + } else { + /* HW requires >= 1 RQ entry with >= 1 gather entry */ + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + return -EINVAL; + + qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); + qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); + } + + cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; + cap->max_recv_sge = qp->rq.max_gs; + + return 0; +} + +static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, + enum ib_qp_type type, struct mlx4_ib_qp *qp) +{ + int s; + + /* Sanity check SQ size before proceeding */ + if (cap->max_send_wr > dev->dev->caps.max_wqes || + cap->max_send_sge > dev->dev->caps.max_sq_sg || + cap->max_inline_data + send_wqe_overhead(type, qp->flags) + + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) + return -EINVAL; + + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && + cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) + return -EINVAL; + + s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), + cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + + send_wqe_overhead(type, qp->flags); + + if (s > dev->dev->caps.max_sq_desc_sz) + return -EINVAL; + + /* + * Hermon supports shrinking WQEs, such that a single work + * request can include multiple units of 1 << wqe_shift. This + * way, work requests can differ in size, and do not have to + * be a power of 2 in size, saving memory and speeding up send + * WR posting. Unfortunately, if we do this then the + * wqe_index field in CQEs can't be used to look up the WR ID + * anymore, so we do this only if selective signaling is off. + * + * Further, on 32-bit platforms, we can't use vmap() to make + * the QP buffer virtually contiguous. Thus we have to use + * constant-sized WRs to make sure a WR is always fully within + * a single page-sized chunk. + * + * Finally, we use NOP work requests to pad the end of the + * work queue, to avoid wrap-around in the middle of WR. We + * set NEC bit to avoid getting completions with error for + * these NOP WRs, but since NEC is only supported starting + * with firmware 2.2.232, we use constant-sized WRs for older + * firmware. + * + * And, since MLX QPs only support SEND, we use constant-sized + * WRs in this case. + * + * We look for the smallest value of wqe_shift such that the + * resulting number of wqes does not exceed device + * capabilities. + * + * We set WQE size to at least 64 bytes, this way stamping + * invalidates each WQE. + */ + if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && + qp->sq_signal_bits && BITS_PER_LONG == 64 && + type != IB_QPT_SMI && type != IB_QPT_GSI) + qp->sq.wqe_shift = ilog2(64); + else + qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); + + for (;;) { + qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); + + /* + * We need to leave 2 KB + 1 WR of headroom in the SQ to + * allow HW to prefetch. + */ + qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; + qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * + qp->sq_max_wqes_per_wr + + qp->sq_spare_wqes); + + if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) + break; + + if (qp->sq_max_wqes_per_wr <= 1) + return -EINVAL; + + ++qp->sq.wqe_shift; + } + + qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, + (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - + send_wqe_overhead(type, qp->flags)) / + sizeof (struct mlx4_wqe_data_seg); + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + if (qp->rq.wqe_shift > qp->sq.wqe_shift) { + qp->rq.offset = 0; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + } else { + qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; + qp->sq.offset = 0; + } + + cap->max_send_wr = qp->sq.max_post = + (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; + cap->max_send_sge = min(qp->sq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); + /* We don't support inline sends for kernel QPs (yet) */ + cap->max_inline_data = 0; + + return 0; +} + +static int set_user_sq_size(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp, + struct mlx4_ib_create_qp *ucmd) +{ + /* Sanity check SQ size before proceeding */ + if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || + ucmd->log_sq_stride > + ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || + ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) + return -EINVAL; + + qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; + qp->sq.wqe_shift = ucmd->log_sq_stride; + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + + return 0; +} + +static int qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return !attr->srq; +} + +static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) +{ + int qpn; + int err; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + INIT_LIST_HEAD(&qp->gid_list); + + qp->state = IB_QPS_RESET; + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); + + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); + if (err) + goto err; + + if (pd->uobject) { + struct mlx4_ib_create_qp ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + err = -EFAULT; + goto err; + } + + qp->sq_no_prefetch = ucmd.sq_no_prefetch; + + err = set_user_sq_size(dev, qp, &ucmd); + if (err) + goto err; + + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + qp->buf_size, 0, 0); + if (IS_ERR(qp->umem)) { + err = PTR_ERR(qp->umem); + goto err; + } + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), + ilog2(qp->umem->page_size), &qp->mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); + if (err) + goto err_mtt; + + if (qp_has_rq(init_attr)) { + err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), + ucmd.db_addr, &qp->db); + if (err) + goto err_mtt; + } + } else { + qp->sq_no_prefetch = 0; + + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) + qp->flags |= MLX4_IB_QP_LSO; + + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); + if (err) + goto err; + + if (qp_has_rq(init_attr)) { + err = mlx4_db_alloc(dev->dev, &qp->db, 0); + if (err) + goto err; + + *qp->db.db = 0; + } + + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { + err = -ENOMEM; + goto err_db; + } + + err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, + &qp->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); + if (err) + goto err_mtt; + + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); + + if (!qp->sq.wrid || !qp->rq.wrid) { + err = -ENOMEM; + goto err_wrid; + } + } + + if (sqpn) { + qpn = sqpn; + } else { + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); + if (err) + goto err_wrid; + } + + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + if (err) + goto err_qpn; + + if (init_attr->qp_type == IB_QPT_XRC_TGT) + qp->mqp.qpn |= (1 << 23); + + /* + * Hardware wants QPN written in big-endian order (after + * shifting) for send doorbell. Precompute this value to save + * a little bit when posting sends. + */ + qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); + + qp->mqp.event = mlx4_ib_qp_event; + + return 0; + +err_qpn: + if (!sqpn) + mlx4_qp_release_range(dev->dev, qpn, 1); + +err_wrid: + if (pd->uobject) { + if (qp_has_rq(init_attr)) + mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); + } else { + kfree(qp->sq.wrid); + kfree(qp->rq.wrid); + } + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &qp->mtt); + +err_buf: + if (pd->uobject) + ib_umem_release(qp->umem); + else + mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); + +err_db: + if (!pd->uobject && qp_has_rq(init_attr)) + mlx4_db_free(dev->dev, &qp->db); + +err: + return err; +} + +static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: return MLX4_QP_STATE_RST; + case IB_QPS_INIT: return MLX4_QP_STATE_INIT; + case IB_QPS_RTR: return MLX4_QP_STATE_RTR; + case IB_QPS_RTS: return MLX4_QP_STATE_RTS; + case IB_QPS_SQD: return MLX4_QP_STATE_SQD; + case IB_QPS_SQE: return MLX4_QP_STATE_SQER; + case IB_QPS_ERR: return MLX4_QP_STATE_ERR; + default: return -1; + } +} + +static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) + __releases(&send_cq->lock) __releases(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + __release(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + +static void del_gid_entries(struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + list_del(&ge->list); + kfree(ge); + } +} + +static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) + return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); + else + return to_mpd(qp->ibqp.pd); +} + +static void get_cqs(struct mlx4_ib_qp *qp, + struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) +{ + switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_TGT: + *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); + *recv_cq = *send_cq; + break; + case IB_QPT_XRC_INI: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = *send_cq; + break; + default: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = to_mcq(qp->ibqp.recv_cq); + break; + } +} + +static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, + int is_user) +{ + struct mlx4_ib_cq *send_cq, *recv_cq; + + if (qp->state != IB_QPS_RESET) + if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), + MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) + printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", + qp->mqp.qpn); + + get_cqs(qp, &send_cq, &recv_cq); + + mlx4_ib_lock_cqs(send_cq, recv_cq); + + if (!is_user) { + __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); + if (send_cq != recv_cq) + __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + } + + mlx4_qp_remove(dev->dev, &qp->mqp); + + mlx4_ib_unlock_cqs(send_cq, recv_cq); + + mlx4_qp_free(dev->dev, &qp->mqp); + + if (!is_sqp(dev, qp)) + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + + mlx4_mtt_cleanup(dev->dev, &qp->mtt); + + if (is_user) { + if (qp->rq.wqe_cnt) + mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), + &qp->db); + ib_umem_release(qp->umem); + } else { + kfree(qp->sq.wrid); + kfree(qp->rq.wrid); + mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); + if (qp->rq.wqe_cnt) + mlx4_db_free(dev->dev, &qp->db); + } + + del_gid_entries(qp); +} + +struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_sqp *sqp; + struct mlx4_ib_qp *qp; + int err; + u16 xrcdn = 0; + + /* + * We only support LSO and multicast loopback blocking, and + * only for kernel UD QPs. + */ + if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + return ERR_PTR(-EINVAL); + + if (init_attr->create_flags && + (udata || init_attr->qp_type != IB_QPT_UD)) + return ERR_PTR(-EINVAL); + + switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + pd = to_mxrcd(init_attr->xrcd)->pd; + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq; + /* fall through */ + case IB_QPT_XRC_INI: + if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + init_attr->recv_cq = init_attr->send_cq; + /* fall through */ + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + { + qp = kzalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibqp.qp_num = qp->mqp.qpn; + qp->xrcdn = xrcdn; + + break; + } + case IB_QPT_SMI: + case IB_QPT_GSI: + { + /* Userspace is not allowed to create special QPs: */ + if (udata) + return ERR_PTR(-EINVAL); + + sqp = kzalloc(sizeof *sqp, GFP_KERNEL); + if (!sqp) + return ERR_PTR(-ENOMEM); + + qp = &sqp->qp; + + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, + to_mdev(pd->device)->dev->caps.sqp_start + + (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + + init_attr->port_num - 1, + qp); + if (err) { + kfree(sqp); + return ERR_PTR(err); + } + + qp->port = init_attr->port_num; + qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; + + break; + } + default: + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + return &qp->ibqp; +} + +int mlx4_ib_destroy_qp(struct ib_qp *qp) +{ + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_ib_qp *mqp = to_mqp(qp); + struct mlx4_ib_pd *pd; + + if (is_qp0(dev, mqp)) + mlx4_CLOSE_PORT(dev->dev, mqp->port); + + pd = get_pd(mqp); + destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); + + if (is_sqp(dev, mqp)) + kfree(to_msqp(mqp)); + else + kfree(mqp); + + return 0; +} + +static int to_mlx4_st(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_RC: return MLX4_QP_ST_RC; + case IB_QPT_UC: return MLX4_QP_ST_UC; + case IB_QPT_UD: return MLX4_QP_ST_UD; + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; + case IB_QPT_SMI: + case IB_QPT_GSI: return MLX4_QP_ST_MLX; + default: return -1; + } +} + +static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, + int attr_mask) +{ + u8 dest_rd_atomic; + u32 access_flags; + u32 hw_access_flags = 0; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MLX4_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= MLX4_QP_BIT_RAE; + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MLX4_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + +static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, + int attr_mask) +{ + if (attr_mask & IB_QP_PKEY_INDEX) + sqp->pkey_index = attr->pkey_index; + if (attr_mask & IB_QP_QKEY) + sqp->qkey = attr->qkey; + if (attr_mask & IB_QP_SQ_PSN) + sqp->send_psn = attr->sq_psn; +} + +static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) +{ + path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); +} + +static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, + struct mlx4_qp_path *path, u8 port) +{ + int err; + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET; + u8 mac[6]; + int is_mcast; + u16 vlan_tag; + int vidx; + + path->grh_mylmc = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + if (ah->static_rate) { + path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; + while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) + --path->static_rate; + } else + path->static_rate = 0; + + if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { + printk(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); + return -1; + } + + path->grh_mylmc |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + if (is_eth) { + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3); + + if (!(ah->ah_flags & IB_AH_GRH)) + return -1; + + err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); + if (err) + return err; + + memcpy(path->dmac, mac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* use index 0 into MAC table for IBoE */ + path->grh_mylmc &= 0x80; + + vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + if (vlan_tag < 0x1000) { + if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) + return -ENOENT; + + path->vlan_index = vidx; + path->fl = 1 << 6; + } + } else + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + + return 0; +} + +static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { + ge->added = 1; + ge->port = qp->port; + } + } +} + +static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_ib_pd *pd; + struct mlx4_ib_cq *send_cq, *recv_cq; + struct mlx4_qp_context *context; + enum mlx4_qp_optpar optpar = 0; + int sqd_event; + int err = -EINVAL; + + context = kzalloc(sizeof *context, GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | + (to_mlx4_st(ibqp->qp_type) << 16)); + + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) + context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); + else { + optpar |= MLX4_QP_OPTPAR_PM_STATE; + switch (attr->path_mig_state) { + case IB_MIG_MIGRATED: + context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); + break; + case IB_MIG_REARM: + context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); + break; + case IB_MIG_ARMED: + context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); + break; + } + } + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; + else if (ibqp->qp_type == IB_QPT_UD) { + if (qp->flags & MLX4_IB_QP_LSO) + context->mtu_msgmax = (IB_MTU_4096 << 5) | + ilog2(dev->dev->caps.max_gso_sz); + else + context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + } else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { + printk(KERN_ERR "path MTU (%u) is invalid\n", + attr->path_mtu); + goto out; + } + context->mtu_msgmax = (attr->path_mtu << 5) | + ilog2(dev->dev->caps.max_msg_sz); + } + + if (qp->rq.wqe_cnt) + context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; + context->rq_size_stride |= qp->rq.wqe_shift - 4; + + if (qp->sq.wqe_cnt) + context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; + context->sq_size_stride |= qp->sq.wqe_shift - 4; + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + context->sq_size_stride |= !!qp->sq_no_prefetch << 7; + context->xrcd = cpu_to_be32((u32) qp->xrcdn); + } + + if (qp->ibqp.uobject) + context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); + else + context->usr_page = cpu_to_be32(dev->priv_uar.index); + + if (attr_mask & IB_QP_DEST_QPN) + context->remote_qpn = cpu_to_be32(attr->dest_qp_num); + + if (attr_mask & IB_QP_PORT) { + if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD && + !(attr_mask & IB_QP_AV)) { + mlx4_set_sched(&context->pri_path, attr->port_num); + optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; + } + } + + if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + if (dev->counters[qp->port - 1] != -1) { + context->pri_path.counter_index = + dev->counters[qp->port - 1]; + optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; + } else + context->pri_path.counter_index = 0xff; + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + context->pri_path.pkey_index = attr->pkey_index; + optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; + } + + if (attr_mask & IB_QP_AV) { + if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, + attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) + goto out; + + optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | + MLX4_QP_OPTPAR_SCHED_QUEUE); + } + + if (attr_mask & IB_QP_TIMEOUT) { + context->pri_path.ackto |= attr->timeout << 3; + optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num == 0 || + attr->alt_port_num > dev->dev->caps.num_ports) + goto out; + + if (attr->alt_pkey_index >= + dev->dev->caps.pkey_table_len[attr->alt_port_num]) + goto out; + + if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + attr->alt_port_num)) + goto out; + + context->alt_path.pkey_index = attr->alt_pkey_index; + context->alt_path.ackto = attr->alt_timeout << 3; + optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; + } + + pd = get_pd(qp); + get_cqs(qp, &send_cq, &recv_cq); + context->pd = cpu_to_be32(pd->pdn); + context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); + context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); + context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + + /* Set "fast registration enabled" for all kernel QPs */ + if (!qp->ibqp.uobject) + context->params1 |= cpu_to_be32(1 << 11); + + if (attr_mask & IB_QP_RNR_RETRY) { + context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + optpar |= MLX4_QP_OPTPAR_RNR_RETRY; + } + + if (attr_mask & IB_QP_RETRY_CNT) { + context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic) + context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + optpar |= MLX4_QP_OPTPAR_SRA_MAX; + } + + if (attr_mask & IB_QP_SQ_PSN) + context->next_send_psn = cpu_to_be32(attr->sq_psn); + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic) + context->params2 |= + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); + optpar |= MLX4_QP_OPTPAR_RRA_MAX; + } + + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); + optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; + } + + if (ibqp->srq) + context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; + } + if (attr_mask & IB_QP_RQ_PSN) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + + if (attr_mask & IB_QP_QKEY) { + context->qkey = cpu_to_be32(attr->qkey); + optpar |= MLX4_QP_OPTPAR_Q_KEY; + } + + if (ibqp->srq) + context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); + + if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->db_rec_addr = cpu_to_be64(qp->db.dma); + + if (cur_state == IB_QPS_INIT && + new_state == IB_QPS_RTR && + (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || + ibqp->qp_type == IB_QPT_UD)) { + context->pri_path.sched_queue = (qp->port - 1) << 6; + if (is_qp0(dev, qp)) + context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; + else + context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; + } + + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) + sqd_event = 1; + else + sqd_event = 0; + + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->rlkey |= (1 << 4); + + /* + * Before passing a kernel QP to the HW, make sure that the + * ownership bits of the send queue are set and the SQ + * headroom is stamped so that the hardware doesn't start + * processing stale work requests. + */ + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + struct mlx4_wqe_ctrl_seg *ctrl; + int i; + + for (i = 0; i < qp->sq.wqe_cnt; ++i) { + ctrl = get_send_wqe(qp, i); + ctrl->owner_opcode = cpu_to_be32(1 << 31); + if (qp->sq_max_wqes_per_wr == 1) + ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); + + stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); + } + } + + err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), + to_mlx4_state(new_state), context, optpar, + sqd_event, &qp->mqp); + if (err) + goto out; + + qp->state = new_state; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) { + qp->port = attr->port_num; + update_mcg_macs(dev, qp); + } + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_port = attr->alt_port_num; + + if (is_sqp(dev, qp)) + store_sqp_attrs(to_msqp(qp), attr, attr_mask); + + /* + * If we moved QP0 to RTR, bring the IB link up; if we moved + * QP0 to RESET or ERROR, bring the link back down. + */ + if (is_qp0(dev, qp)) { + if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) + if (mlx4_INIT_PORT(dev->dev, qp->port)) + printk(KERN_WARNING "INIT_PORT failed for port %d\n", + qp->port); + + if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && + (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) + mlx4_CLOSE_PORT(dev->dev, qp->port); + } + + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (new_state == IB_QPS_RESET && !ibqp->uobject) { + mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq): NULL); + if (send_cq != recv_cq) + mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq_next_wqe = 0; + if (qp->rq.wqe_cnt) + *qp->db.db = 0; + } + +out: + kfree(context); + return err; +} + +int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + + mutex_lock(&qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + goto out; + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + goto out; + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { + goto out; + } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + + err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, + void *wqe, unsigned *mlx_seg_len) +{ + struct ib_device *ib_dev = sqp->qp.ibqp.device; + struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; + struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + union ib_gid sgid; + u16 pkey; + int send_size; + int header_size; + int spc; + int i; + int is_eth; + int is_vlan = 0; + int is_grh; + u16 vlan; + + send_size = 0; + for (i = 0; i < wr->num_sge; ++i) + send_size += wr->sg_list[i].length; + + is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_grh = mlx4_ib_ah_grh_present(ah); + if (is_eth) { + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + vlan = rdma_get_vlan_id(&sgid); + is_vlan = vlan < 0x1000; + } + ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); + + if (!is_eth) { + sqp->ud_header.lrh.service_level = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; + sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } + + if (is_grh) { + sqp->ud_header.grh.traffic_class = + (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; + sqp->ud_header.grh.flow_label = + ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); + memcpy(sqp->ud_header.grh.destination_gid.raw, + ah->av.ib.dgid, 16); + } + + mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); + + if (!is_eth) { + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + } + + switch (wr->opcode) { + case IB_WR_SEND: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; + sqp->ud_header.immediate_present = 0; + break; + case IB_WR_SEND_WITH_IMM: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + sqp->ud_header.immediate_present = 1; + sqp->ud_header.immediate_data = wr->ex.imm_data; + break; + default: + return -EINVAL; + } + + if (is_eth) { + u8 *smac; + + memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); + /* FIXME: cache smac value? */ + smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; + memcpy(sqp->ud_header.eth.smac_h, smac, 6); + if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) + mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); + if (!is_vlan) { + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + u16 pcp; + + sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; + sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); + } + } else { + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + } + sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + if (!sqp->qp.ibqp.qp_num) + ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); + else + ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? + sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + + header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); + + if (0) { + printk(KERN_ERR "built UD header of size %d:\n", header_size); + for (i = 0; i < header_size / 4; ++i) { + if (i % 8 == 0) + printk(" [%02x] ", i * 4); + printk(" %08x", + be32_to_cpu(((__be32 *) sqp->header_buf)[i])); + if ((i + 1) % 8 == 0) + printk("\n"); + } + printk("\n"); + } + + /* + * Inline data segments may not cross a 64 byte boundary. If + * our UD header is bigger than the space available up to the + * next 64 byte boundary in the WQE, use two inline data + * segments to hold the UD header. + */ + spc = MLX4_INLINE_ALIGN - + ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (header_size <= spc) { + inl->byte_count = cpu_to_be32(1 << 31 | header_size); + memcpy(inl + 1, sqp->header_buf, header_size); + i = 1; + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + memcpy(inl + 1, sqp->header_buf, spc); + + inl = (void *) (inl + 1) + spc; + memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); + /* + * Need a barrier here to make sure all the data is + * visible before the byte_count field is set. + * Otherwise the HCA prefetcher could grab the 64-byte + * chunk with this inline segment and get a valid (!= + * 0xffffffff) byte count but stale data, and end up + * generating a packet with bad headers. + * + * The first inline segment's byte_count field doesn't + * need a barrier, because it comes after a + * control/MLX segment and therefore is at an offset + * of 16 mod 64. + */ + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); + i = 2; + } + + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + return 0; +} + +static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) +{ + unsigned cur; + struct mlx4_ib_cq *cq; + + cur = wq->head - wq->tail; + if (likely(cur + nreq < wq->max_post)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static __be32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | + cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); +} + +static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +{ + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + int i; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) + mfrpl->mapped_page_list[i] = + cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | + MLX4_MTT_FLAG_PRESENT); + + fseg->flags = convert_access(wr->wr.fast_reg.access_flags); + fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); + fseg->buf_list = cpu_to_be64(mfrpl->map); + fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ + fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->reserved[0] = 0; + fseg->reserved[1] = 0; +} + +static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) +{ + iseg->flags = 0; + iseg->mem_key = cpu_to_be32(rkey); + iseg->guest_id = 0; + iseg->pa = 0; +} + +static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); +} + +static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); +} + +static void set_mlx_icrc_seg(void *dseg) +{ + u32 *t = dseg; + struct mlx4_wqe_inline_seg *iseg = dseg; + + t[1] = 0; + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + iseg->byte_count = cpu_to_be32((1 << 31) | 4); +} + +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + dseg->byte_count = cpu_to_be32(sg->length); +} + +static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, + struct mlx4_ib_qp *qp, unsigned *lso_seg_len, + __be32 *lso_hdr_sz, __be32 *blh) +{ + unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); + + if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) + *blh = cpu_to_be32(1 << 6); + + if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && + wr->num_sge > qp->sq.max_gs - (halign >> 4))) + return -EINVAL; + + memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); + + *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | + wr->wr.ud.hlen); + *lso_seg_len = halign; + return 0; +} + +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + +int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mlx4_ib_qp *qp = to_mqp(ibqp); + void *wqe; + struct mlx4_wqe_ctrl_seg *ctrl; + struct mlx4_wqe_data_seg *dseg; + unsigned long flags; + int nreq; + int err = 0; + unsigned ind; + int uninitialized_var(stamp); + int uninitialized_var(size); + unsigned uninitialized_var(seglen); + __be32 dummy; + __be32 *lso_wqe; + __be32 uninitialized_var(lso_hdr_sz); + __be32 blh; + int i; + + spin_lock_irqsave(&qp->sq.lock, flags); + + ind = qp->sq_next_wqe; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + lso_wqe = &dummy; + blh = 0; + + if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + + ctrl->srcrb_flags = + (wr->send_flags & IB_SEND_SIGNALED ? + cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | + ((wr->send_flags & IB_SEND_IP_CSUM) ? + cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | + MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | + qp->sq_signal_bits; + + ctrl->imm = send_ieth(wr); + + wqe += sizeof *ctrl; + size = sizeof *ctrl / 16; + + switch (ibqp->qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + switch (wr->opcode) { + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_atomic_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_atomic_seg); + + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_atomic_seg)) / 16; + + break; + + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_masked_atomic_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); + + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; + + break; + + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + size += sizeof (struct mlx4_wqe_raddr_seg) / 16; + break; + + case IB_WR_LOCAL_INV: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_local_inv_seg(wqe, wr->ex.invalidate_rkey); + wqe += sizeof (struct mlx4_wqe_local_inval_seg); + size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; + break; + + case IB_WR_FAST_REG_MR: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_fmr_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_fmr_seg); + size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + break; + + case IB_QPT_UD: + set_datagram_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + + if (wr->opcode == IB_WR_LSO) { + err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + lso_wqe = (__be32 *) wqe; + wqe += seglen; + size += seglen / 16; + } + break; + + case IB_QPT_SMI: + case IB_QPT_GSI: + err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + wqe += seglen; + size += seglen / 16; + break; + + default: + break; + } + + /* + * Write data segments in reverse order, so as to + * overwrite cacheline stamp last within each + * cacheline. This avoids issues with WQE + * prefetching. + */ + + dseg = wqe; + dseg += wr->num_sge - 1; + size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); + + /* Add one more inline data segment for ICRC for MLX sends */ + if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI)) { + set_mlx_icrc_seg(dseg + 1); + size += sizeof (struct mlx4_wqe_data_seg) / 16; + } + + for (i = wr->num_sge - 1; i >= 0; --i, --dseg) + set_data_seg(dseg, wr->sg_list + i); + + /* + * Possibly overwrite stamping in cacheline with LSO + * segment only after making sure all data segments + * are written. + */ + wmb(); + *lso_wqe = lso_hdr_sz; + + ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? + MLX4_WQE_CTRL_FENCE : 0) | size; + + /* + * Make sure descriptor is fully written before + * setting ownership bit (because HW can start + * executing as soon as we do). + */ + wmb(); + + if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { + *bad_wr = wr; + err = -EINVAL; + goto out; + } + + ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; + + stamp = ind + qp->sq_spare_wqes; + ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); + + /* + * We can improve latency by not stamping the last + * send queue WQE until after ringing the doorbell, so + * only stamp here if there are still more WQEs to post. + * + * Same optimization applies to padding with NOP wqe + * in case of WQE shrinking (used to prevent wrap-around + * in the middle of WR). + */ + if (wr->next) { + stamp_send_wqe(qp, stamp, size * 16); + ind = pad_wraparound(qp, ind); + } + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + writel(qp->doorbell_qpn, + to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); + + /* + * Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + mmiowb(); + + stamp_send_wqe(qp, stamp, size * 16); + + ind = pad_wraparound(qp, ind); + qp->sq_next_wqe = ind; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_wqe_data_seg *scat; + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&qp->rq.lock, flags); + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + scat = get_recv_wqe(qp, ind); + + for (i = 0; i < wr->num_sge; ++i) + __set_data_seg(scat + i, wr->sg_list + i); + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return err; +} + +static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) +{ + switch (mlx4_state) { + case MLX4_QP_STATE_RST: return IB_QPS_RESET; + case MLX4_QP_STATE_INIT: return IB_QPS_INIT; + case MLX4_QP_STATE_RTR: return IB_QPS_RTR; + case MLX4_QP_STATE_RTS: return IB_QPS_RTS; + case MLX4_QP_STATE_SQ_DRAINING: + case MLX4_QP_STATE_SQD: return IB_QPS_SQD; + case MLX4_QP_STATE_SQER: return IB_QPS_SQE; + case MLX4_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) +{ + switch (mlx4_mig_state) { + case MLX4_QP_PM_ARMED: return IB_MIG_ARMED; + case MLX4_QP_PM_REARM: return IB_MIG_REARM; + case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mlx4_flags) +{ + int ib_flags = 0; + + if (mlx4_flags & MLX4_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mlx4_flags & MLX4_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mlx4_flags & MLX4_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, + struct mlx4_qp_path *path) +{ + struct mlx4_dev *dev = ibdev->dev; + int is_eth; + + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); + ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + return; + + is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) + ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | + ((path->sched_queue & 4) << 1); + else + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; + ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; + ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index; + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof ib_ah_attr->grh.dgid.raw); + } +} + +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_qp_context context; + int mlx4_state; + int err = 0; + + mutex_lock(&qp->mutex); + + if (qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + err = mlx4_qp_query(dev->dev, &qp->mqp, &context); + if (err) { + err = -EINVAL; + goto out; + } + + mlx4_state = be32_to_cpu(context.flags) >> 28; + + qp->state = to_ib_qp_state(mlx4_state); + qp_attr->qp_state = qp->state; + qp_attr->path_mtu = context.mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context.qkey); + qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context.params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); + qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; + if (qp_attr->qp_state == IB_QPS_INIT) + qp_attr->port_num = qp->port; + else + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context.pri_path.ackto >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; + qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; + qp_attr->alt_timeout = context.alt_path.ackto >> 3; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + /* + * We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + + qp_init_attr->create_flags = 0; + if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + if (qp->flags & MLX4_IB_QP_LSO) + qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + +out: + mutex_unlock(&qp->mutex); + return err; +} + diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c new file mode 100644 index 00000000..39542f37 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "mlx4_ib.h" +#include "user.h" + +static void *get_wqe(struct mlx4_ib_srq *srq, int n) +{ + return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); +} + +static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) +{ + struct ib_event event; + struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (type) { + case MLX4_EVENT_TYPE_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + "on SRQ %06x\n", type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_srq *srq; + struct mlx4_wqe_srq_next_seg *next; + struct mlx4_wqe_data_seg *scatter; + u32 cqn; + u16 xrcdn; + int desc_size; + int buf_size; + int err; + int i; + + /* Sanity check SRQ size before proceeding */ + if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || + init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) + return ERR_PTR(-EINVAL); + + srq = kmalloc(sizeof *srq, GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->msrq.max_gs = init_attr->attr.max_sge; + + desc_size = max(32UL, + roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) + + srq->msrq.max_gs * + sizeof (struct mlx4_wqe_data_seg))); + srq->msrq.wqe_shift = ilog2(desc_size); + + buf_size = srq->msrq.max * desc_size; + + if (pd->uobject) { + struct mlx4_ib_create_srq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + err = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + err = PTR_ERR(srq->umem); + goto err_srq; + } + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), + ilog2(srq->umem->page_size), &srq->mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem); + if (err) + goto err_mtt; + + err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), + ucmd.db_addr, &srq->db); + if (err) + goto err_mtt; + } else { + err = mlx4_db_alloc(dev->dev, &srq->db, 0); + if (err) + goto err_srq; + + *srq->db.db = 0; + + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + err = -ENOMEM; + goto err_db; + } + + srq->head = 0; + srq->tail = srq->msrq.max - 1; + srq->wqe_ctr = 0; + + for (i = 0; i < srq->msrq.max; ++i) { + next = get_wqe(srq, i); + next->next_wqe_index = + cpu_to_be16((i + 1) & (srq->msrq.max - 1)); + + for (scatter = (void *) (next + 1); + (void *) scatter < (void *) next + desc_size; + ++scatter) + scatter->lkey = cpu_to_be32(MLX4_INVALID_LKEY); + } + + err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift, + &srq->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); + if (err) + goto err_mtt; + + srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL); + if (!srq->wrid) { + err = -ENOMEM; + goto err_mtt; + } + } + + cqn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; + xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : + (u16) dev->dev->caps.reserved_xrcds; + err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, + srq->db.dma, &srq->msrq); + if (err) + goto err_wrid; + + srq->msrq.event = mlx4_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; + + if (pd->uobject) + if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { + err = -EFAULT; + goto err_wrid; + } + + init_attr->attr.max_wr = srq->msrq.max - 1; + + return &srq->ibsrq; + +err_wrid: + if (pd->uobject) + mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + else + kfree(srq->wrid); + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &srq->mtt); + +err_buf: + if (pd->uobject) + ib_umem_release(srq->umem); + else + mlx4_buf_free(dev->dev, buf_size, &srq->buf); + +err_db: + if (!pd->uobject) + mlx4_db_free(dev->dev, &srq->db); + +err_srq: + kfree(srq); + + return ERR_PTR(err); +} + +int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + int ret; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->msrq.max) + return -EINVAL; + + mutex_lock(&srq->mutex); + ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit); + mutex_unlock(&srq->mutex); + + if (ret) + return ret; + } + + return 0; +} + +int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + int ret; + int limit_watermark; + + ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark); + if (ret) + return ret; + + srq_attr->srq_limit = limit_watermark; + srq_attr->max_wr = srq->msrq.max - 1; + srq_attr->max_sge = srq->msrq.max_gs; + + return 0; +} + +int mlx4_ib_destroy_srq(struct ib_srq *srq) +{ + struct mlx4_ib_dev *dev = to_mdev(srq->device); + struct mlx4_ib_srq *msrq = to_msrq(srq); + + mlx4_srq_free(dev->dev, &msrq->msrq); + mlx4_mtt_cleanup(dev->dev, &msrq->mtt); + + if (srq->uobject) { + mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + ib_umem_release(msrq->umem); + } else { + kfree(msrq->wrid); + mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, + &msrq->buf); + mlx4_db_free(dev->dev, &msrq->db); + } + + kfree(msrq); + + return 0; +} + +void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) +{ + struct mlx4_wqe_srq_next_seg *next; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + next = get_wqe(srq, srq->tail); + next->next_wqe_index = cpu_to_be16(wqe_index); + srq->tail = wqe_index; + + spin_unlock(&srq->lock); +} + +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + struct mlx4_wqe_srq_next_seg *next; + struct mlx4_wqe_data_seg *scat; + unsigned long flags; + int err = 0; + int nreq; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(wr->num_sge > srq->msrq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + + srq->wrid[srq->head] = wr->wr_id; + + next = get_wqe(srq, srq->head); + srq->head = be16_to_cpu(next->next_wqe_index); + scat = (struct mlx4_wqe_data_seg *) (next + 1); + + for (i = 0; i < wr->num_sge; ++i) { + scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); + scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); + scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); + } + + if (i < srq->msrq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); + scat[i].addr = 0; + } + } + + if (likely(nreq)) { + srq->wqe_ctr += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *srq->db.db = cpu_to_be32(srq->wqe_ctr); + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return err; +} diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h new file mode 100644 index 00000000..13beedee --- /dev/null +++ b/drivers/infiniband/hw/mlx4/user.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_IB_USER_H +#define MLX4_IB_USER_H + +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MLX4_IB_UVERBS_ABI_VERSION 3 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mlx4_ib_alloc_ucontext_resp { + __u32 qp_tab_size; + __u16 bf_reg_size; + __u16 bf_regs_per_page; +}; + +struct mlx4_ib_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct mlx4_ib_create_cq { + __u64 buf_addr; + __u64 db_addr; +}; + +struct mlx4_ib_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mlx4_ib_resize_cq { + __u64 buf_addr; +}; + +struct mlx4_ib_create_srq { + __u64 buf_addr; + __u64 db_addr; +}; + +struct mlx4_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mlx4_ib_create_qp { + __u64 buf_addr; + __u64 db_addr; + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 sq_no_prefetch; + __u8 reserved[5]; +}; + +#endif /* MLX4_IB_USER_H */ diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig new file mode 100644 index 00000000..da314c3f --- /dev/null +++ b/drivers/infiniband/hw/mthca/Kconfig @@ -0,0 +1,17 @@ +config INFINIBAND_MTHCA + tristate "Mellanox HCA support" + depends on PCI + ---help--- + This is a low-level driver for Mellanox InfiniHost host + channel adapters (HCAs), including the MT23108 PCI-X HCA + ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). + +config INFINIBAND_MTHCA_DEBUG + bool "Verbose debugging output" if EXPERT + depends on INFINIBAND_MTHCA + default y + ---help--- + This option causes debugging code to be compiled into the + mthca driver. The output can be turned on via the + debug_level module parameter (which can also be set after + the driver is loaded through sysfs). diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile new file mode 100644 index 00000000..e388d95d --- /dev/null +++ b/drivers/infiniband/hw/mthca/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o + +ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ + mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ + mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \ + mthca_catas.o diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c new file mode 100644 index 00000000..b4e0cf4e --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "mthca_dev.h" + +/* Trivial bitmap-based allocator */ +u32 mthca_alloc(struct mthca_alloc *alloc) +{ + unsigned long flags; + u32 obj; + + spin_lock_irqsave(&alloc->lock, flags); + + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); + if (obj >= alloc->max) { + alloc->top = (alloc->top + alloc->max) & alloc->mask; + obj = find_first_zero_bit(alloc->table, alloc->max); + } + + if (obj < alloc->max) { + set_bit(obj, alloc->table); + obj |= alloc->top; + } else + obj = -1; + + spin_unlock_irqrestore(&alloc->lock, flags); + + return obj; +} + +void mthca_free(struct mthca_alloc *alloc, u32 obj) +{ + unsigned long flags; + + obj &= alloc->max - 1; + + spin_lock_irqsave(&alloc->lock, flags); + + clear_bit(obj, alloc->table); + alloc->last = min(alloc->last, obj); + alloc->top = (alloc->top + alloc->max) & alloc->mask; + + spin_unlock_irqrestore(&alloc->lock, flags); +} + +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, + u32 reserved) +{ + int i; + + /* num must be a power of 2 */ + if (num != 1 << (ffs(num) - 1)) + return -EINVAL; + + alloc->last = 0; + alloc->top = 0; + alloc->max = num; + alloc->mask = mask; + spin_lock_init(&alloc->lock); + alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long), + GFP_KERNEL); + if (!alloc->table) + return -ENOMEM; + + bitmap_zero(alloc->table, num); + for (i = 0; i < reserved; ++i) + set_bit(i, alloc->table); + + return 0; +} + +void mthca_alloc_cleanup(struct mthca_alloc *alloc) +{ + kfree(alloc->table); +} + +/* + * Array of pointers with lazy allocation of leaf pages. Callers of + * _get, _set and _clear methods must use a lock or otherwise + * serialize access to the array. + */ + +#define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1) + +void *mthca_array_get(struct mthca_array *array, int index) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + if (array->page_list[p].page) + return array->page_list[p].page[index & MTHCA_ARRAY_MASK]; + else + return NULL; +} + +int mthca_array_set(struct mthca_array *array, int index, void *value) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + /* Allocate with GFP_ATOMIC because we'll be called with locks held. */ + if (!array->page_list[p].page) + array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC); + + if (!array->page_list[p].page) + return -ENOMEM; + + array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value; + ++array->page_list[p].used; + + return 0; +} + +void mthca_array_clear(struct mthca_array *array, int index) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + if (--array->page_list[p].used == 0) { + free_page((unsigned long) array->page_list[p].page); + array->page_list[p].page = NULL; + } else + array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL; + + if (array->page_list[p].used < 0) + pr_debug("Array %p index %d page %d with ref count %d < 0\n", + array, index, p, array->page_list[p].used); +} + +int mthca_array_init(struct mthca_array *array, int nent) +{ + int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; + int i; + + array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL); + if (!array->page_list) + return -ENOMEM; + + for (i = 0; i < npage; ++i) { + array->page_list[i].page = NULL; + array->page_list[i].used = 0; + } + + return 0; +} + +void mthca_array_cleanup(struct mthca_array *array, int nent) +{ + int i; + + for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i) + free_page((unsigned long) array->page_list[i].page); + + kfree(array->page_list); +} + +/* + * Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr) +{ + int err = -ENOMEM; + int npages, shift; + u64 *dma_list = NULL; + dma_addr_t t; + int i; + + if (size <= max_direct) { + *is_direct = 1; + npages = 1; + shift = get_order(size) + PAGE_SHIFT; + + buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + dma_unmap_addr_set(&buf->direct, mapping, t); + + memset(buf->direct.buf, 0, size); + + while (t & ((1 << shift) - 1)) { + --shift; + npages *= 2; + } + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_free; + + for (i = 0; i < npages; ++i) + dma_list[i] = t + i * (1 << shift); + } else { + *is_direct = 0; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + shift = PAGE_SHIFT; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + return -ENOMEM; + + buf->page_list = kmalloc(npages * sizeof *buf->page_list, + GFP_KERNEL); + if (!buf->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + buf->page_list[i].buf = NULL; + + for (i = 0; i < npages; ++i) { + buf->page_list[i].buf = + dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); + if (!buf->page_list[i].buf) + goto err_free; + + dma_list[i] = t; + dma_unmap_addr_set(&buf->page_list[i], mapping, t); + + clear_page(buf->page_list[i].buf); + } + } + + err = mthca_mr_alloc_phys(dev, pd->pd_num, + dma_list, shift, npages, + 0, size, + MTHCA_MPT_FLAG_LOCAL_READ | + (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), + mr); + if (err) + goto err_free; + + kfree(dma_list); + + return 0; + +err_free: + mthca_buf_free(dev, size, buf, *is_direct, NULL); + +err_out: + kfree(dma_list); + + return err; +} + +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr) +{ + int i; + + if (mr) + mthca_free_mr(dev, mr); + + if (is_direct) + dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + dma_unmap_addr(&buf->direct, mapping)); + else { + for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + dma_unmap_addr(&buf->page_list[i], + mapping)); + kfree(buf->page_list); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c new file mode 100644 index 00000000..32f6c631 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include +#include + +#include "mthca_dev.h" + +enum { + MTHCA_RATE_TAVOR_FULL = 0, + MTHCA_RATE_TAVOR_1X = 1, + MTHCA_RATE_TAVOR_4X = 2, + MTHCA_RATE_TAVOR_1X_DDR = 3 +}; + +enum { + MTHCA_RATE_MEMFREE_FULL = 0, + MTHCA_RATE_MEMFREE_QUARTER = 1, + MTHCA_RATE_MEMFREE_EIGHTH = 2, + MTHCA_RATE_MEMFREE_HALF = 3 +}; + +struct mthca_av { + __be32 port_pd; + u8 reserved1; + u8 g_slid; + __be16 dlid; + u8 reserved2; + u8 gid_index; + u8 msg_sr; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + __be32 dgid[4]; +}; + +static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate) +{ + switch (mthca_rate) { + case MTHCA_RATE_MEMFREE_EIGHTH: + return mult_to_ib_rate(port_rate >> 3); + case MTHCA_RATE_MEMFREE_QUARTER: + return mult_to_ib_rate(port_rate >> 2); + case MTHCA_RATE_MEMFREE_HALF: + return mult_to_ib_rate(port_rate >> 1); + case MTHCA_RATE_MEMFREE_FULL: + default: + return mult_to_ib_rate(port_rate); + } +} + +static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate) +{ + switch (mthca_rate) { + case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS; + case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS; + case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS; + default: return mult_to_ib_rate(port_rate); + } +} + +enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port) +{ + if (mthca_is_memfree(dev)) { + /* Handle old Arbel FW */ + if (dev->limits.stat_rate_support == 0x3 && mthca_rate) + return IB_RATE_2_5_GBPS; + + return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]); + } else + return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]); +} + +static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate) +{ + if (cur_rate <= req_rate) + return 0; + + /* + * Inter-packet delay (IPD) to get from rate X down to a rate + * no more than Y is (X - 1) / Y. + */ + switch ((cur_rate - 1) / req_rate) { + case 0: return MTHCA_RATE_MEMFREE_FULL; + case 1: return MTHCA_RATE_MEMFREE_HALF; + case 2: /* fall through */ + case 3: return MTHCA_RATE_MEMFREE_QUARTER; + default: return MTHCA_RATE_MEMFREE_EIGHTH; + } +} + +static u8 ib_rate_to_tavor(u8 static_rate) +{ + switch (static_rate) { + case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X; + case IB_RATE_5_GBPS: return MTHCA_RATE_TAVOR_1X_DDR; + case IB_RATE_10_GBPS: return MTHCA_RATE_TAVOR_4X; + default: return MTHCA_RATE_TAVOR_FULL; + } +} + +u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port) +{ + u8 rate; + + if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1]) + return 0; + + if (mthca_is_memfree(dev)) + rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate), + dev->rate[port - 1]); + else + rate = ib_rate_to_tavor(static_rate); + + if (!(dev->limits.stat_rate_support & (1 << rate))) + rate = 1; + + return rate; +} + +int mthca_create_ah(struct mthca_dev *dev, + struct mthca_pd *pd, + struct ib_ah_attr *ah_attr, + struct mthca_ah *ah) +{ + u32 index = -1; + struct mthca_av *av = NULL; + + ah->type = MTHCA_AH_PCI_POOL; + + if (mthca_is_memfree(dev)) { + ah->av = kmalloc(sizeof *ah->av, GFP_ATOMIC); + if (!ah->av) + return -ENOMEM; + + ah->type = MTHCA_AH_KMALLOC; + av = ah->av; + } else if (!atomic_read(&pd->sqp_count) && + !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + index = mthca_alloc(&dev->av_table.alloc); + + /* fall back to allocate in host memory */ + if (index == -1) + goto on_hca_fail; + + av = kmalloc(sizeof *av, GFP_ATOMIC); + if (!av) + goto on_hca_fail; + + ah->type = MTHCA_AH_ON_HCA; + ah->avdma = dev->av_table.ddr_av_base + + index * MTHCA_AV_SIZE; + } + +on_hca_fail: + if (ah->type == MTHCA_AH_PCI_POOL) { + ah->av = pci_pool_alloc(dev->av_table.pool, + GFP_ATOMIC, &ah->avdma); + if (!ah->av) + return -ENOMEM; + + av = ah->av; + } + + ah->key = pd->ntmr.ibmr.lkey; + + memset(av, 0, MTHCA_AV_SIZE); + + av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); + av->g_slid = ah_attr->src_path_bits; + av->dlid = cpu_to_be16(ah_attr->dlid); + av->msg_sr = (3 << 4) | /* 2K message */ + mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num); + av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + if (ah_attr->ah_flags & IB_AH_GRH) { + av->g_slid |= 0x80; + av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len + + ah_attr->grh.sgid_index; + av->hop_limit = ah_attr->grh.hop_limit; + av->sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); + } else { + /* Arbel workaround -- low byte of GID must be 2 */ + av->dgid[3] = cpu_to_be32(2); + } + + if (0) { + int j; + + mthca_dbg(dev, "Created UDAV at %p/%08lx:\n", + av, (unsigned long) ah->avdma); + for (j = 0; j < 8; ++j) + printk(KERN_DEBUG " [%2x] %08x\n", + j * 4, be32_to_cpu(((__be32 *) av)[j])); + } + + if (ah->type == MTHCA_AH_ON_HCA) { + memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE, + av, MTHCA_AV_SIZE); + kfree(av); + } + + return 0; +} + +int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah) +{ + switch (ah->type) { + case MTHCA_AH_ON_HCA: + mthca_free(&dev->av_table.alloc, + (ah->avdma - dev->av_table.ddr_av_base) / + MTHCA_AV_SIZE); + break; + + case MTHCA_AH_PCI_POOL: + pci_pool_free(dev->av_table.pool, ah->av, ah->avdma); + break; + + case MTHCA_AH_KMALLOC: + kfree(ah->av); + break; + } + + return 0; +} + +int mthca_ah_grh_present(struct mthca_ah *ah) +{ + return !!(ah->av->g_slid & 0x80); +} + +int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, + struct ib_ud_header *header) +{ + if (ah->type == MTHCA_AH_ON_HCA) + return -EINVAL; + + header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; + header->lrh.destination_lid = ah->av->dlid; + header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f); + if (mthca_ah_grh_present(ah)) { + header->grh.traffic_class = + (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff; + header->grh.flow_label = + ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); + header->grh.hop_limit = ah->av->hop_limit; + ib_get_cached_gid(&dev->ib_dev, + be32_to_cpu(ah->av->port_pd) >> 24, + ah->av->gid_index % dev->limits.gid_table_len, + &header->grh.source_gid); + memcpy(header->grh.destination_gid.raw, + ah->av->dgid, 16); + } + + return 0; +} + +int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct mthca_ah *ah = to_mah(ibah); + struct mthca_dev *dev = to_mdev(ibah->device); + + /* Only implement for MAD and memfree ah for now. */ + if (ah->type == MTHCA_AH_ON_HCA) + return -ENOSYS; + + memset(attr, 0, sizeof *attr); + attr->dlid = be16_to_cpu(ah->av->dlid); + attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; + attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24; + attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7, + attr->port_num); + attr->src_path_bits = ah->av->g_slid & 0x7F; + attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0; + + if (attr->ah_flags) { + attr->grh.traffic_class = + be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20; + attr->grh.flow_label = + be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff; + attr->grh.hop_limit = ah->av->hop_limit; + attr->grh.sgid_index = ah->av->gid_index & + (dev->limits.gid_table_len - 1); + memcpy(attr->grh.dgid.raw, ah->av->dgid, 16); + } + + return 0; +} + +int mthca_init_av_table(struct mthca_dev *dev) +{ + int err; + + if (mthca_is_memfree(dev)) + return 0; + + err = mthca_alloc_init(&dev->av_table.alloc, + dev->av_table.num_ddr_avs, + dev->av_table.num_ddr_avs - 1, + 0); + if (err) + return err; + + dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev, + MTHCA_AV_SIZE, + MTHCA_AV_SIZE, 0); + if (!dev->av_table.pool) + goto out_free_alloc; + + if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) + + dev->av_table.ddr_av_base - + dev->ddr_start, + dev->av_table.num_ddr_avs * + MTHCA_AV_SIZE); + if (!dev->av_table.av_map) + goto out_free_pool; + } else + dev->av_table.av_map = NULL; + + return 0; + + out_free_pool: + pci_pool_destroy(dev->av_table.pool); + + out_free_alloc: + mthca_alloc_cleanup(&dev->av_table.alloc); + return -ENOMEM; +} + +void mthca_cleanup_av_table(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) + return; + + if (dev->av_table.av_map) + iounmap(dev->av_table.av_map); + pci_pool_destroy(dev->av_table.pool); + mthca_alloc_cleanup(&dev->av_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c new file mode 100644 index 00000000..712d2a30 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "mthca_dev.h" + +enum { + MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, + + MTHCA_CATAS_TYPE_INTERNAL = 0, + MTHCA_CATAS_TYPE_UPLINK = 3, + MTHCA_CATAS_TYPE_DDR = 4, + MTHCA_CATAS_TYPE_PARITY = 5, +}; + +static DEFINE_SPINLOCK(catas_lock); + +static LIST_HEAD(catas_list); +static struct workqueue_struct *catas_wq; +static struct work_struct catas_work; + +static int catas_reset_disable; +module_param_named(catas_reset_disable, catas_reset_disable, int, 0644); +MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero"); + +static void catas_reset(struct work_struct *work) +{ + struct mthca_dev *dev, *tmpdev; + LIST_HEAD(tlist); + int ret; + + mutex_lock(&mthca_device_mutex); + + spin_lock_irq(&catas_lock); + list_splice_init(&catas_list, &tlist); + spin_unlock_irq(&catas_lock); + + list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { + struct pci_dev *pdev = dev->pdev; + ret = __mthca_restart_one(dev->pdev); + /* 'dev' now is not valid */ + if (ret) + printk(KERN_ERR "mthca %s: Reset failed (%d)\n", + pci_name(pdev), ret); + else { + struct mthca_dev *d = pci_get_drvdata(pdev); + mthca_dbg(d, "Reset succeeded\n"); + } + } + + mutex_unlock(&mthca_device_mutex); +} + +static void handle_catas(struct mthca_dev *dev) +{ + struct ib_event event; + unsigned long flags; + const char *type; + int i; + + event.device = &dev->ib_dev; + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + dev->active = false; + + ib_dispatch_event(&event); + + switch (swab32(readl(dev->catas_err.map)) >> 24) { + case MTHCA_CATAS_TYPE_INTERNAL: + type = "internal error"; + break; + case MTHCA_CATAS_TYPE_UPLINK: + type = "uplink bus error"; + break; + case MTHCA_CATAS_TYPE_DDR: + type = "DDR data error"; + break; + case MTHCA_CATAS_TYPE_PARITY: + type = "internal parity error"; + break; + default: + type = "unknown error"; + break; + } + + mthca_err(dev, "Catastrophic error detected: %s\n", type); + for (i = 0; i < dev->catas_err.size; ++i) + mthca_err(dev, " buf[%02x]: %08x\n", + i, swab32(readl(dev->catas_err.map + i))); + + if (catas_reset_disable) + return; + + spin_lock_irqsave(&catas_lock, flags); + list_add(&dev->catas_err.list, &catas_list); + queue_work(catas_wq, &catas_work); + spin_unlock_irqrestore(&catas_lock, flags); +} + +static void poll_catas(unsigned long dev_ptr) +{ + struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; + int i; + + for (i = 0; i < dev->catas_err.size; ++i) + if (readl(dev->catas_err.map + i)) { + handle_catas(dev); + return; + } + + mod_timer(&dev->catas_err.timer, + round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL)); +} + +void mthca_start_catas_poll(struct mthca_dev *dev) +{ + phys_addr_t addr; + + init_timer(&dev->catas_err.timer); + dev->catas_err.map = NULL; + + addr = pci_resource_start(dev->pdev, 0) + + ((pci_resource_len(dev->pdev, 0) - 1) & + dev->catas_err.addr); + + dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); + if (!dev->catas_err.map) { + mthca_warn(dev, "couldn't map catastrophic error region " + "at 0x%llx/0x%x\n", (unsigned long long) addr, + dev->catas_err.size * 4); + return; + } + + dev->catas_err.timer.data = (unsigned long) dev; + dev->catas_err.timer.function = poll_catas; + dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; + INIT_LIST_HEAD(&dev->catas_err.list); + add_timer(&dev->catas_err.timer); +} + +void mthca_stop_catas_poll(struct mthca_dev *dev) +{ + del_timer_sync(&dev->catas_err.timer); + + if (dev->catas_err.map) + iounmap(dev->catas_err.map); + + spin_lock_irq(&catas_lock); + list_del(&dev->catas_err.list); + spin_unlock_irq(&catas_lock); +} + +int __init mthca_catas_init(void) +{ + INIT_WORK(&catas_work, catas_reset); + + catas_wq = create_singlethread_workqueue("mthca_catas"); + if (!catas_wq) + return -ENOMEM; + + return 0; +} + +void mthca_catas_cleanup(void) +{ + destroy_workqueue(catas_wq); +} diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c new file mode 100644 index 00000000..9d3e5c1a --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -0,0 +1,1969 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mthca_dev.h" +#include "mthca_config_reg.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" + +#define CMD_POLL_TOKEN 0xffff + +enum { + HCR_IN_PARAM_OFFSET = 0x00, + HCR_IN_MODIFIER_OFFSET = 0x08, + HCR_OUT_PARAM_OFFSET = 0x0c, + HCR_TOKEN_OFFSET = 0x14, + HCR_STATUS_OFFSET = 0x18, + + HCR_OPMOD_SHIFT = 12, + HCA_E_BIT = 22, + HCR_GO_BIT = 23 +}; + +enum { + /* initialization and general commands */ + CMD_SYS_EN = 0x1, + CMD_SYS_DIS = 0x2, + CMD_MAP_FA = 0xfff, + CMD_UNMAP_FA = 0xffe, + CMD_RUN_FW = 0xff6, + CMD_MOD_STAT_CFG = 0x34, + CMD_QUERY_DEV_LIM = 0x3, + CMD_QUERY_FW = 0x4, + CMD_ENABLE_LAM = 0xff8, + CMD_DISABLE_LAM = 0xff7, + CMD_QUERY_DDR = 0x5, + CMD_QUERY_ADAPTER = 0x6, + CMD_INIT_HCA = 0x7, + CMD_CLOSE_HCA = 0x8, + CMD_INIT_IB = 0x9, + CMD_CLOSE_IB = 0xa, + CMD_QUERY_HCA = 0xb, + CMD_SET_IB = 0xc, + CMD_ACCESS_DDR = 0x2e, + CMD_MAP_ICM = 0xffa, + CMD_UNMAP_ICM = 0xff9, + CMD_MAP_ICM_AUX = 0xffc, + CMD_UNMAP_ICM_AUX = 0xffb, + CMD_SET_ICM_SIZE = 0xffd, + + /* TPT commands */ + CMD_SW2HW_MPT = 0xd, + CMD_QUERY_MPT = 0xe, + CMD_HW2SW_MPT = 0xf, + CMD_READ_MTT = 0x10, + CMD_WRITE_MTT = 0x11, + CMD_SYNC_TPT = 0x2f, + + /* EQ commands */ + CMD_MAP_EQ = 0x12, + CMD_SW2HW_EQ = 0x13, + CMD_HW2SW_EQ = 0x14, + CMD_QUERY_EQ = 0x15, + + /* CQ commands */ + CMD_SW2HW_CQ = 0x16, + CMD_HW2SW_CQ = 0x17, + CMD_QUERY_CQ = 0x18, + CMD_RESIZE_CQ = 0x2c, + + /* SRQ commands */ + CMD_SW2HW_SRQ = 0x35, + CMD_HW2SW_SRQ = 0x36, + CMD_QUERY_SRQ = 0x37, + CMD_ARM_SRQ = 0x40, + + /* QP/EE commands */ + CMD_RST2INIT_QPEE = 0x19, + CMD_INIT2RTR_QPEE = 0x1a, + CMD_RTR2RTS_QPEE = 0x1b, + CMD_RTS2RTS_QPEE = 0x1c, + CMD_SQERR2RTS_QPEE = 0x1d, + CMD_2ERR_QPEE = 0x1e, + CMD_RTS2SQD_QPEE = 0x1f, + CMD_SQD2SQD_QPEE = 0x38, + CMD_SQD2RTS_QPEE = 0x20, + CMD_ERR2RST_QPEE = 0x21, + CMD_QUERY_QPEE = 0x22, + CMD_INIT2INIT_QPEE = 0x2d, + CMD_SUSPEND_QPEE = 0x32, + CMD_UNSUSPEND_QPEE = 0x33, + /* special QPs and management commands */ + CMD_CONF_SPECIAL_QP = 0x23, + CMD_MAD_IFC = 0x24, + + /* multicast commands */ + CMD_READ_MGM = 0x25, + CMD_WRITE_MGM = 0x26, + CMD_MGID_HASH = 0x27, + + /* miscellaneous commands */ + CMD_DIAG_RPRT = 0x30, + CMD_NOP = 0x31, + + /* debug commands */ + CMD_QUERY_DEBUG_MSG = 0x2a, + CMD_SET_DEBUG_MSG = 0x2b, +}; + +/* + * According to Mellanox code, FW may be starved and never complete + * commands. So we can't use strict timeouts described in PRM -- we + * just arbitrarily select 60 seconds for now. + */ +#if 0 +/* + * Round up and add 1 to make sure we get the full wait time (since we + * will be starting in the middle of a jiffy) + */ +enum { + CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1, + CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1, + CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1, + CMD_TIME_CLASS_D = 60 * HZ +}; +#else +enum { + CMD_TIME_CLASS_A = 60 * HZ, + CMD_TIME_CLASS_B = 60 * HZ, + CMD_TIME_CLASS_C = 60 * HZ, + CMD_TIME_CLASS_D = 60 * HZ +}; +#endif + +enum { + GO_BIT_TIMEOUT = HZ * 10 +}; + +struct mthca_cmd_context { + struct completion done; + int result; + int next; + u64 out_param; + u16 token; + u8 status; +}; + +static int fw_cmd_doorbell = 0; +module_param(fw_cmd_doorbell, int, 0644); +MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero " + "(and supported by FW)"); + +static inline int go_bit(struct mthca_dev *dev) +{ + return readl(dev->hcr + HCR_STATUS_OFFSET) & + swab32(1 << HCR_GO_BIT); +} + +static void mthca_cmd_post_dbell(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token) +{ + void __iomem *ptr = dev->cmd.dbell_map; + u16 *offs = dev->cmd.dbell_offsets; + + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), ptr + offs[0]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), ptr + offs[1]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(in_modifier), ptr + offs[2]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), ptr + offs[3]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]); + wmb(); + __raw_writel((__force u32) cpu_to_be32(token << 16), ptr + offs[5]); + wmb(); + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (1 << HCA_E_BIT) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), ptr + offs[6]); + wmb(); + __raw_writel((__force u32) 0, ptr + offs[7]); + wmb(); +} + +static int mthca_cmd_post_hcr(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token, + int event) +{ + if (event) { + unsigned long end = jiffies + GO_BIT_TIMEOUT; + + while (go_bit(dev) && time_before(jiffies, end)) { + set_current_state(TASK_RUNNING); + schedule(); + } + } + + if (go_bit(dev)) + return -EAGAIN; + + /* + * We use writel (instead of something like memcpy_toio) + * because writes of less than 32 bits to the HCR don't work + * (and some architectures such as ia64 implement memcpy_toio + * in terms of writeb). + */ + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); + __raw_writel((__force u32) cpu_to_be32(in_modifier), dev->hcr + 2 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); + __raw_writel((__force u32) cpu_to_be32(token << 16), dev->hcr + 5 * 4); + + /* __raw_writel may not order writes. */ + wmb(); + + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (event ? (1 << HCA_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), dev->hcr + 6 * 4); + + return 0; +} + +static int mthca_cmd_post(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token, + int event) +{ + int err = 0; + + mutex_lock(&dev->cmd.hcr_mutex); + + if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell) + mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier, + op_modifier, op, token); + else + err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier, + op_modifier, op, token, event); + + /* + * Make sure that our HCR writes don't get mixed in with + * writes from another CPU starting a FW command. + */ + mmiowb(); + + mutex_unlock(&dev->cmd.hcr_mutex); + return err; +} + + +static int mthca_status_to_errno(u8 status) +{ + static const int trans_table[] = { + [MTHCA_CMD_STAT_INTERNAL_ERR] = -EIO, + [MTHCA_CMD_STAT_BAD_OP] = -EPERM, + [MTHCA_CMD_STAT_BAD_PARAM] = -EINVAL, + [MTHCA_CMD_STAT_BAD_SYS_STATE] = -ENXIO, + [MTHCA_CMD_STAT_BAD_RESOURCE] = -EBADF, + [MTHCA_CMD_STAT_RESOURCE_BUSY] = -EBUSY, + [MTHCA_CMD_STAT_DDR_MEM_ERR] = -ENOMEM, + [MTHCA_CMD_STAT_EXCEED_LIM] = -ENOMEM, + [MTHCA_CMD_STAT_BAD_RES_STATE] = -EBADF, + [MTHCA_CMD_STAT_BAD_INDEX] = -EBADF, + [MTHCA_CMD_STAT_BAD_NVMEM] = -EFAULT, + [MTHCA_CMD_STAT_BAD_QPEE_STATE] = -EINVAL, + [MTHCA_CMD_STAT_BAD_SEG_PARAM] = -EFAULT, + [MTHCA_CMD_STAT_REG_BOUND] = -EBUSY, + [MTHCA_CMD_STAT_LAM_NOT_PRE] = -EAGAIN, + [MTHCA_CMD_STAT_BAD_PKT] = -EBADMSG, + [MTHCA_CMD_STAT_BAD_SIZE] = -ENOMEM, + }; + + if (status >= ARRAY_SIZE(trans_table) || + (status != MTHCA_CMD_STAT_OK + && trans_table[status] == 0)) + return -EINVAL; + + return trans_table[status]; +} + + +static int mthca_cmd_poll(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + int out_is_imm, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout) +{ + int err = 0; + unsigned long end; + u8 status; + + down(&dev->cmd.poll_sem); + + err = mthca_cmd_post(dev, in_param, + out_param ? *out_param : 0, + in_modifier, op_modifier, + op, CMD_POLL_TOKEN, 0); + if (err) + goto out; + + end = timeout + jiffies; + while (go_bit(dev) && time_before(jiffies, end)) { + set_current_state(TASK_RUNNING); + schedule(); + } + + if (go_bit(dev)) { + err = -EBUSY; + goto out; + } + + if (out_is_imm) + *out_param = + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4)); + + status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; + if (status) { + mthca_dbg(dev, "Command %02x completed with status %02x\n", + op, status); + err = mthca_status_to_errno(status); + } + +out: + up(&dev->cmd.poll_sem); + return err; +} + +void mthca_cmd_event(struct mthca_dev *dev, + u16 token, + u8 status, + u64 out_param) +{ + struct mthca_cmd_context *context = + &dev->cmd.context[token & dev->cmd.token_mask]; + + /* previously timed out command completing at long last */ + if (token != context->token) + return; + + context->result = 0; + context->status = status; + context->out_param = out_param; + + complete(&context->done); +} + +static int mthca_cmd_wait(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + int out_is_imm, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout) +{ + int err = 0; + struct mthca_cmd_context *context; + + down(&dev->cmd.event_sem); + + spin_lock(&dev->cmd.context_lock); + BUG_ON(dev->cmd.free_head < 0); + context = &dev->cmd.context[dev->cmd.free_head]; + context->token += dev->cmd.token_mask + 1; + dev->cmd.free_head = context->next; + spin_unlock(&dev->cmd.context_lock); + + init_completion(&context->done); + + err = mthca_cmd_post(dev, in_param, + out_param ? *out_param : 0, + in_modifier, op_modifier, + op, context->token, 1); + if (err) + goto out; + + if (!wait_for_completion_timeout(&context->done, timeout)) { + err = -EBUSY; + goto out; + } + + err = context->result; + if (err) + goto out; + + if (context->status) { + mthca_dbg(dev, "Command %02x completed with status %02x\n", + op, context->status); + err = mthca_status_to_errno(context->status); + } + + if (out_is_imm) + *out_param = context->out_param; + +out: + spin_lock(&dev->cmd.context_lock); + context->next = dev->cmd.free_head; + dev->cmd.free_head = context - dev->cmd.context; + spin_unlock(&dev->cmd.context_lock); + + up(&dev->cmd.event_sem); + return err; +} + +/* Invoke a command with an output mailbox */ +static int mthca_cmd_box(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout) +{ + if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS) + return mthca_cmd_wait(dev, in_param, &out_param, 0, + in_modifier, op_modifier, op, + timeout); + else + return mthca_cmd_poll(dev, in_param, &out_param, 0, + in_modifier, op_modifier, op, + timeout); +} + +/* Invoke a command with no output parameter */ +static int mthca_cmd(struct mthca_dev *dev, + u64 in_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout) +{ + return mthca_cmd_box(dev, in_param, 0, in_modifier, + op_modifier, op, timeout); +} + +/* + * Invoke a command with an immediate output parameter (and copy the + * output into the caller's out_param pointer after the command + * executes). + */ +static int mthca_cmd_imm(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout) +{ + if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS) + return mthca_cmd_wait(dev, in_param, out_param, 1, + in_modifier, op_modifier, op, + timeout); + else + return mthca_cmd_poll(dev, in_param, out_param, 1, + in_modifier, op_modifier, op, + timeout); +} + +int mthca_cmd_init(struct mthca_dev *dev) +{ + mutex_init(&dev->cmd.hcr_mutex); + sema_init(&dev->cmd.poll_sem, 1); + dev->cmd.flags = 0; + + dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE, + MTHCA_HCR_SIZE); + if (!dev->hcr) { + mthca_err(dev, "Couldn't map command register."); + return -ENOMEM; + } + + dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev, + MTHCA_MAILBOX_SIZE, + MTHCA_MAILBOX_SIZE, 0); + if (!dev->cmd.pool) { + iounmap(dev->hcr); + return -ENOMEM; + } + + return 0; +} + +void mthca_cmd_cleanup(struct mthca_dev *dev) +{ + pci_pool_destroy(dev->cmd.pool); + iounmap(dev->hcr); + if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS) + iounmap(dev->cmd.dbell_map); +} + +/* + * Switch to using events to issue FW commands (should be called after + * event queue to command events has been initialized). + */ +int mthca_cmd_use_events(struct mthca_dev *dev) +{ + int i; + + dev->cmd.context = kmalloc(dev->cmd.max_cmds * + sizeof (struct mthca_cmd_context), + GFP_KERNEL); + if (!dev->cmd.context) + return -ENOMEM; + + for (i = 0; i < dev->cmd.max_cmds; ++i) { + dev->cmd.context[i].token = i; + dev->cmd.context[i].next = i + 1; + } + + dev->cmd.context[dev->cmd.max_cmds - 1].next = -1; + dev->cmd.free_head = 0; + + sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds); + spin_lock_init(&dev->cmd.context_lock); + + for (dev->cmd.token_mask = 1; + dev->cmd.token_mask < dev->cmd.max_cmds; + dev->cmd.token_mask <<= 1) + ; /* nothing */ + --dev->cmd.token_mask; + + dev->cmd.flags |= MTHCA_CMD_USE_EVENTS; + + down(&dev->cmd.poll_sem); + + return 0; +} + +/* + * Switch back to polling (used when shutting down the device) + */ +void mthca_cmd_use_polling(struct mthca_dev *dev) +{ + int i; + + dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS; + + for (i = 0; i < dev->cmd.max_cmds; ++i) + down(&dev->cmd.event_sem); + + kfree(dev->cmd.context); + + up(&dev->cmd.poll_sem); +} + +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + gfp_t gfp_mask) +{ + struct mthca_mailbox *mailbox; + + mailbox = kmalloc(sizeof *mailbox, gfp_mask); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} + +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) +{ + if (!mailbox) + return; + + pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + +int mthca_SYS_EN(struct mthca_dev *dev) +{ + u64 out; + int ret; + + ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D); + + if (ret == -ENOMEM) + mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, " + "sladdr=%d, SPD source=%s\n", + (int) (out >> 6) & 0xf, (int) (out >> 4) & 3, + (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM"); + + return ret; +} + +int mthca_SYS_DIS(struct mthca_dev *dev) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C); +} + +static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, + u64 virt) +{ + struct mthca_mailbox *mailbox; + struct mthca_icm_iter iter; + __be64 *pages; + int lg; + int nent = 0; + int i; + int err = 0; + int ts = 0, tc = 0; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE); + pages = mailbox->buf; + + for (mthca_icm_first(icm, &iter); + !mthca_icm_last(&iter); + mthca_icm_next(&iter)) { + /* + * We have to pass pages that are aligned to their + * size, so find the least significant 1 in the + * address or size and use that as our log2 size. + */ + lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1; + if (lg < MTHCA_ICM_PAGE_SHIFT) { + mthca_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n", + MTHCA_ICM_PAGE_SIZE, + (unsigned long long) mthca_icm_addr(&iter), + mthca_icm_size(&iter)); + err = -EINVAL; + goto out; + } + for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) { + if (virt != -1) { + pages[nent * 2] = cpu_to_be64(virt); + virt += 1 << lg; + } + + pages[nent * 2 + 1] = + cpu_to_be64((mthca_icm_addr(&iter) + (i << lg)) | + (lg - MTHCA_ICM_PAGE_SHIFT)); + ts += 1 << (lg - 10); + ++tc; + + if (++nent == MTHCA_MAILBOX_SIZE / 16) { + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, + CMD_TIME_CLASS_B); + if (err) + goto out; + nent = 0; + } + } + } + + if (nent) + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, + CMD_TIME_CLASS_B); + + switch (op) { + case CMD_MAP_FA: + mthca_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts); + break; + case CMD_MAP_ICM_AUX: + mthca_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts); + break; + case CMD_MAP_ICM: + mthca_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n", + tc, ts, (unsigned long long) virt - (ts << 10)); + break; + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm) +{ + return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1); +} + +int mthca_UNMAP_FA(struct mthca_dev *dev) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B); +} + +int mthca_RUN_FW(struct mthca_dev *dev) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A); +} + +static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base) +{ + phys_addr_t addr; + u16 max_off = 0; + int i; + + for (i = 0; i < 8; ++i) + max_off = max(max_off, dev->cmd.dbell_offsets[i]); + + if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) { + mthca_warn(dev, "Firmware doorbell region at 0x%016llx, " + "length 0x%x crosses a page boundary\n", + (unsigned long long) base, max_off); + return; + } + + addr = pci_resource_start(dev->pdev, 2) + + ((pci_resource_len(dev->pdev, 2) - 1) & base); + dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32)); + if (!dev->cmd.dbell_map) + return; + + dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS; + mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n"); +} + +int mthca_QUERY_FW(struct mthca_dev *dev) +{ + struct mthca_mailbox *mailbox; + u32 *outbox; + u64 base; + u32 tmp; + int err = 0; + u8 lg; + int i; + +#define QUERY_FW_OUT_SIZE 0x100 +#define QUERY_FW_VER_OFFSET 0x00 +#define QUERY_FW_MAX_CMD_OFFSET 0x0f +#define QUERY_FW_ERR_START_OFFSET 0x30 +#define QUERY_FW_ERR_SIZE_OFFSET 0x38 + +#define QUERY_FW_CMD_DB_EN_OFFSET 0x10 +#define QUERY_FW_CMD_DB_OFFSET 0x50 +#define QUERY_FW_CMD_DB_BASE 0x60 + +#define QUERY_FW_START_OFFSET 0x20 +#define QUERY_FW_END_OFFSET 0x28 + +#define QUERY_FW_SIZE_OFFSET 0x00 +#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 +#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40 +#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW, + CMD_TIME_CLASS_A); + + if (err) + goto out; + + MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET); + /* + * FW subminor version is at more significant bits than minor + * version, so swap here. + */ + dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) | + ((dev->fw_ver & 0xffff0000ull) >> 16) | + ((dev->fw_ver & 0x0000ffffull) << 16); + + MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); + dev->cmd.max_cmds = 1 << lg; + + mthca_dbg(dev, "FW version %012llx, max commands %d\n", + (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); + + MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); + MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); + + mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n", + (unsigned long long) dev->catas_err.addr, dev->catas_err.size); + + MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET); + if (tmp & 0x1) { + mthca_dbg(dev, "FW supports commands through doorbells\n"); + + MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE); + for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i) + MTHCA_GET(dev->cmd.dbell_offsets[i], outbox, + QUERY_FW_CMD_DB_OFFSET + (i << 1)); + + mthca_setup_cmd_doorbells(dev, base); + } + + if (mthca_is_memfree(dev)) { + MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); + MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); + MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET); + MTHCA_GET(dev->fw.arbel.eq_set_ci_base, outbox, QUERY_FW_EQ_SET_CI_BASE_OFFSET); + mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2); + + /* + * Round up number of system pages needed in case + * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE. + */ + dev->fw.arbel.fw_pages = + ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT); + + mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n", + (unsigned long long) dev->fw.arbel.clr_int_base, + (unsigned long long) dev->fw.arbel.eq_arm_base, + (unsigned long long) dev->fw.arbel.eq_set_ci_base); + } else { + MTHCA_GET(dev->fw.tavor.fw_start, outbox, QUERY_FW_START_OFFSET); + MTHCA_GET(dev->fw.tavor.fw_end, outbox, QUERY_FW_END_OFFSET); + + mthca_dbg(dev, "FW size %d KB (start %llx, end %llx)\n", + (int) ((dev->fw.tavor.fw_end - dev->fw.tavor.fw_start) >> 10), + (unsigned long long) dev->fw.tavor.fw_start, + (unsigned long long) dev->fw.tavor.fw_end); + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_ENABLE_LAM(struct mthca_dev *dev) +{ + struct mthca_mailbox *mailbox; + u8 info; + u32 *outbox; + int err = 0; + +#define ENABLE_LAM_OUT_SIZE 0x100 +#define ENABLE_LAM_START_OFFSET 0x00 +#define ENABLE_LAM_END_OFFSET 0x08 +#define ENABLE_LAM_INFO_OFFSET 0x13 + +#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4) +#define ENABLE_LAM_INFO_ECC_MASK 0x3 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM, + CMD_TIME_CLASS_C); + + if (err) + goto out; + + MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET); + MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET); + MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET); + + if (!!(info & ENABLE_LAM_INFO_HIDDEN_FLAG) != + !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + mthca_info(dev, "FW reports that HCA-attached memory " + "is %s hidden; does not match PCI config\n", + (info & ENABLE_LAM_INFO_HIDDEN_FLAG) ? + "" : "not"); + } + if (info & ENABLE_LAM_INFO_HIDDEN_FLAG) + mthca_dbg(dev, "HCA-attached memory is hidden.\n"); + + mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n", + (int) ((dev->ddr_end - dev->ddr_start) >> 10), + (unsigned long long) dev->ddr_start, + (unsigned long long) dev->ddr_end); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_DISABLE_LAM(struct mthca_dev *dev) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C); +} + +int mthca_QUERY_DDR(struct mthca_dev *dev) +{ + struct mthca_mailbox *mailbox; + u8 info; + u32 *outbox; + int err = 0; + +#define QUERY_DDR_OUT_SIZE 0x100 +#define QUERY_DDR_START_OFFSET 0x00 +#define QUERY_DDR_END_OFFSET 0x08 +#define QUERY_DDR_INFO_OFFSET 0x13 + +#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4) +#define QUERY_DDR_INFO_ECC_MASK 0x3 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR, + CMD_TIME_CLASS_A); + + if (err) + goto out; + + MTHCA_GET(dev->ddr_start, outbox, QUERY_DDR_START_OFFSET); + MTHCA_GET(dev->ddr_end, outbox, QUERY_DDR_END_OFFSET); + MTHCA_GET(info, outbox, QUERY_DDR_INFO_OFFSET); + + if (!!(info & QUERY_DDR_INFO_HIDDEN_FLAG) != + !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + mthca_info(dev, "FW reports that HCA-attached memory " + "is %s hidden; does not match PCI config\n", + (info & QUERY_DDR_INFO_HIDDEN_FLAG) ? + "" : "not"); + } + if (info & QUERY_DDR_INFO_HIDDEN_FLAG) + mthca_dbg(dev, "HCA-attached memory is hidden.\n"); + + mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n", + (int) ((dev->ddr_end - dev->ddr_start) >> 10), + (unsigned long long) dev->ddr_start, + (unsigned long long) dev->ddr_end); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, + struct mthca_dev_lim *dev_lim) +{ + struct mthca_mailbox *mailbox; + u32 *outbox; + u8 field; + u16 size; + u16 stat_rate; + int err; + +#define QUERY_DEV_LIM_OUT_SIZE 0x100 +#define QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET 0x10 +#define QUERY_DEV_LIM_MAX_QP_SZ_OFFSET 0x11 +#define QUERY_DEV_LIM_RSVD_QP_OFFSET 0x12 +#define QUERY_DEV_LIM_MAX_QP_OFFSET 0x13 +#define QUERY_DEV_LIM_RSVD_SRQ_OFFSET 0x14 +#define QUERY_DEV_LIM_MAX_SRQ_OFFSET 0x15 +#define QUERY_DEV_LIM_RSVD_EEC_OFFSET 0x16 +#define QUERY_DEV_LIM_MAX_EEC_OFFSET 0x17 +#define QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET 0x19 +#define QUERY_DEV_LIM_RSVD_CQ_OFFSET 0x1a +#define QUERY_DEV_LIM_MAX_CQ_OFFSET 0x1b +#define QUERY_DEV_LIM_MAX_MPT_OFFSET 0x1d +#define QUERY_DEV_LIM_RSVD_EQ_OFFSET 0x1e +#define QUERY_DEV_LIM_MAX_EQ_OFFSET 0x1f +#define QUERY_DEV_LIM_RSVD_MTT_OFFSET 0x20 +#define QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET 0x21 +#define QUERY_DEV_LIM_RSVD_MRW_OFFSET 0x22 +#define QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_LIM_MAX_AV_OFFSET 0x27 +#define QUERY_DEV_LIM_MAX_REQ_QP_OFFSET 0x29 +#define QUERY_DEV_LIM_MAX_RES_QP_OFFSET 0x2b +#define QUERY_DEV_LIM_MAX_RDMA_OFFSET 0x2f +#define QUERY_DEV_LIM_RSZ_SRQ_OFFSET 0x33 +#define QUERY_DEV_LIM_ACK_DELAY_OFFSET 0x35 +#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36 +#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37 +#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b +#define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET 0x3c +#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44 +#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48 +#define QUERY_DEV_LIM_UAR_SZ_OFFSET 0x49 +#define QUERY_DEV_LIM_PAGE_SZ_OFFSET 0x4b +#define QUERY_DEV_LIM_MAX_SG_OFFSET 0x51 +#define QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET 0x52 +#define QUERY_DEV_LIM_MAX_SG_RQ_OFFSET 0x55 +#define QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_LIM_MAX_QP_MCG_OFFSET 0x61 +#define QUERY_DEV_LIM_RSVD_MCG_OFFSET 0x62 +#define QUERY_DEV_LIM_MAX_MCG_OFFSET 0x63 +#define QUERY_DEV_LIM_RSVD_PD_OFFSET 0x64 +#define QUERY_DEV_LIM_MAX_PD_OFFSET 0x65 +#define QUERY_DEV_LIM_RSVD_RDD_OFFSET 0x66 +#define QUERY_DEV_LIM_MAX_RDD_OFFSET 0x67 +#define QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET 0x80 +#define QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET 0x82 +#define QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET 0x84 +#define QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET 0x86 +#define QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET 0x88 +#define QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET 0x8a +#define QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET 0x8c +#define QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET 0x8e +#define QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET 0x90 +#define QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET 0x92 +#define QUERY_DEV_LIM_PBL_SZ_OFFSET 0x96 +#define QUERY_DEV_LIM_BMME_FLAGS_OFFSET 0x97 +#define QUERY_DEV_LIM_RSVD_LKEY_OFFSET 0x98 +#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f +#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM, + CMD_TIME_CLASS_A); + + if (err) + goto out; + + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); + dev_lim->reserved_qps = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); + dev_lim->max_qps = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_SRQ_OFFSET); + dev_lim->reserved_srqs = 1 << (field >> 4); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_OFFSET); + dev_lim->max_srqs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EEC_OFFSET); + dev_lim->reserved_eecs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EEC_OFFSET); + dev_lim->max_eecs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET); + dev_lim->max_cq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_CQ_OFFSET); + dev_lim->reserved_cqs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_OFFSET); + dev_lim->max_cqs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MPT_OFFSET); + dev_lim->max_mpts = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EQ_OFFSET); + dev_lim->reserved_eqs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); + dev_lim->max_eqs = 1 << (field & 0x7); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); + if (mthca_is_memfree(dev)) + dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), + dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size; + else + dev_lim->reserved_mtts = 1 << (field >> 4); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); + dev_lim->max_mrw_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); + dev_lim->reserved_mrws = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET); + dev_lim->max_mtt_seg = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_REQ_QP_OFFSET); + dev_lim->max_requester_per_qp = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RES_QP_OFFSET); + dev_lim->max_responder_per_qp = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDMA_OFFSET); + dev_lim->max_rdma_global = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_ACK_DELAY_OFFSET); + dev_lim->local_ca_ack_delay = field & 0x1f; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MTU_WIDTH_OFFSET); + dev_lim->max_mtu = field >> 4; + dev_lim->max_port_width = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_VL_PORT_OFFSET); + dev_lim->max_vl = field >> 4; + dev_lim->num_ports = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET); + dev_lim->max_gids = 1 << (field & 0xf); + MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET); + dev_lim->stat_rate_support = stat_rate; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET); + dev_lim->max_pkeys = 1 << (field & 0xf); + MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_UAR_OFFSET); + dev_lim->reserved_uars = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_UAR_SZ_OFFSET); + dev_lim->uar_size = 1 << ((field & 0x3f) + 20); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_PAGE_SZ_OFFSET); + dev_lim->min_page_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_OFFSET); + dev_lim->max_sg = field; + + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET); + dev_lim->max_desc_sz = size; + + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_MCG_OFFSET); + dev_lim->max_qp_per_mcg = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MCG_OFFSET); + dev_lim->reserved_mgms = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MCG_OFFSET); + dev_lim->max_mcgs = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_PD_OFFSET); + dev_lim->reserved_pds = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PD_OFFSET); + dev_lim->max_pds = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_RDD_OFFSET); + dev_lim->reserved_rdds = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDD_OFFSET); + dev_lim->max_rdds = 1 << (field & 0x3f); + + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET); + dev_lim->eec_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET); + dev_lim->qpc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET); + dev_lim->eeec_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET); + dev_lim->eqpc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET); + dev_lim->eqc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET); + dev_lim->cqc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET); + dev_lim->srq_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET); + dev_lim->uar_scratch_entry_sz = size; + + if (mthca_is_memfree(dev)) { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET); + dev_lim->hca.arbel.resize_srq = field & 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); + dev_lim->max_sg = min_t(int, field, dev_lim->max_sg); + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET); + dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz); + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET); + dev_lim->mpt_entry_sz = size; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET); + dev_lim->hca.arbel.max_pbl_sz = 1 << (field & 0x3f); + MTHCA_GET(dev_lim->hca.arbel.bmme_flags, outbox, + QUERY_DEV_LIM_BMME_FLAGS_OFFSET); + MTHCA_GET(dev_lim->hca.arbel.reserved_lkey, outbox, + QUERY_DEV_LIM_RSVD_LKEY_OFFSET); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_LAMR_OFFSET); + dev_lim->hca.arbel.lam_required = field & 1; + MTHCA_GET(dev_lim->hca.arbel.max_icm_sz, outbox, + QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET); + + if (dev_lim->hca.arbel.bmme_flags & 1) + mthca_dbg(dev, "Base MM extensions: yes " + "(flags %d, max PBL %d, rsvd L_Key %08x)\n", + dev_lim->hca.arbel.bmme_flags, + dev_lim->hca.arbel.max_pbl_sz, + dev_lim->hca.arbel.reserved_lkey); + else + mthca_dbg(dev, "Base MM extensions: no\n"); + + mthca_dbg(dev, "Max ICM size %lld MB\n", + (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20); + } else { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = (1 << field) - 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = (1 << field) - 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET); + dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f); + dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE; + } + + mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", + dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz); + mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz); + mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", + dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz); + mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz); + mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", + dev_lim->reserved_mrws, dev_lim->reserved_mtts); + mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", + dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); + mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", + dev_lim->max_pds, dev_lim->reserved_mgms); + mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz); + + mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +static void get_board_id(void *vsd, char *board_id) +{ + int i; + +#define VSD_OFFSET_SIG1 0x00 +#define VSD_OFFSET_SIG2 0xde +#define VSD_OFFSET_MLX_BOARD_ID 0xd0 +#define VSD_OFFSET_TS_BOARD_ID 0x20 + +#define VSD_SIGNATURE_TOPSPIN 0x5ad + + memset(board_id, 0, MTHCA_BOARD_ID_LEN); + + if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && + be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { + strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); + } else { + /* + * The board ID is a string but the firmware byte + * swaps each 4-byte word before passing it back to + * us. Therefore we need to swab it before printing. + */ + for (i = 0; i < 4; ++i) + ((u32 *) board_id)[i] = + swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + } +} + +int mthca_QUERY_ADAPTER(struct mthca_dev *dev, + struct mthca_adapter *adapter) +{ + struct mthca_mailbox *mailbox; + u32 *outbox; + int err; + +#define QUERY_ADAPTER_OUT_SIZE 0x100 +#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00 +#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04 +#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 +#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 +#define QUERY_ADAPTER_VSD_OFFSET 0x20 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER, + CMD_TIME_CLASS_A); + + if (err) + goto out; + + if (!mthca_is_memfree(dev)) { + MTHCA_GET(adapter->vendor_id, outbox, + QUERY_ADAPTER_VENDOR_ID_OFFSET); + MTHCA_GET(adapter->device_id, outbox, + QUERY_ADAPTER_DEVICE_ID_OFFSET); + MTHCA_GET(adapter->revision_id, outbox, + QUERY_ADAPTER_REVISION_ID_OFFSET); + } + MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + + get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, + adapter->board_id); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_INIT_HCA(struct mthca_dev *dev, + struct mthca_init_hca_param *param) +{ + struct mthca_mailbox *mailbox; + __be32 *inbox; + int err; + +#define INIT_HCA_IN_SIZE 0x200 +#define INIT_HCA_FLAGS1_OFFSET 0x00c +#define INIT_HCA_FLAGS2_OFFSET 0x014 +#define INIT_HCA_QPC_OFFSET 0x020 +#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) +#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) +#define INIT_HCA_EEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x20) +#define INIT_HCA_LOG_EEC_OFFSET (INIT_HCA_QPC_OFFSET + 0x27) +#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) +#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) +#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) +#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) +#define INIT_HCA_EQPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) +#define INIT_HCA_EEEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) +#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) +#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_RDB_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) +#define INIT_HCA_UDAV_OFFSET 0x0b0 +#define INIT_HCA_UDAV_LKEY_OFFSET (INIT_HCA_UDAV_OFFSET + 0x0) +#define INIT_HCA_UDAV_PD_OFFSET (INIT_HCA_UDAV_OFFSET + 0x4) +#define INIT_HCA_MCAST_OFFSET 0x0c0 +#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) +#define INIT_HCA_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) +#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) +#define INIT_HCA_TPT_OFFSET 0x0f0 +#define INIT_HCA_MPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_MTT_SEG_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x09) +#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) +#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) +#define INIT_HCA_UAR_OFFSET 0x120 +#define INIT_HCA_UAR_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x00) +#define INIT_HCA_UARC_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x09) +#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a) +#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) +#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10) +#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18) + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, INIT_HCA_IN_SIZE); + + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET); + +#if defined(__LITTLE_ENDIAN) + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1); +#elif defined(__BIG_ENDIAN) + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1); +#else +#error Host endianness not defined +#endif + /* Check port for UD address vector: */ + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1); + + /* Enable IPoIB checksumming if we can: */ + if (dev->device_cap_flags & IB_DEVICE_UD_IP_CSUM) + *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(7 << 3); + + /* We leave wqe_quota, responder_exu, etc as 0 (default) */ + + /* QPC/EEC/CQC/EQC/RDB attributes */ + + MTHCA_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); + MTHCA_PUT(inbox, param->eec_base, INIT_HCA_EEC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_eecs, INIT_HCA_LOG_EEC_OFFSET); + MTHCA_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); + MTHCA_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); + MTHCA_PUT(inbox, param->eqpc_base, INIT_HCA_EQPC_BASE_OFFSET); + MTHCA_PUT(inbox, param->eeec_base, INIT_HCA_EEEC_BASE_OFFSET); + MTHCA_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MTHCA_PUT(inbox, param->rdb_base, INIT_HCA_RDB_BASE_OFFSET); + + /* UD AV attributes */ + + /* multicast attributes */ + + MTHCA_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MTHCA_PUT(inbox, param->mc_hash_sz, INIT_HCA_MC_HASH_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + + /* TPT attributes */ + + MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET); + if (!mthca_is_memfree(dev)) + MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); + MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); + + /* UAR attributes */ + { + u8 uar_page_sz = PAGE_SHIFT - 12; + MTHCA_PUT(inbox, uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); + } + + MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET); + + if (mthca_is_memfree(dev)) { + MTHCA_PUT(inbox, param->log_uarc_sz, INIT_HCA_UARC_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); + MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET); + } + + err = mthca_cmd(dev, mailbox->dma, 0, 0, + CMD_INIT_HCA, CMD_TIME_CLASS_D); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_INIT_IB(struct mthca_dev *dev, + struct mthca_init_ib_param *param, + int port) +{ + struct mthca_mailbox *mailbox; + u32 *inbox; + int err; + u32 flags; + +#define INIT_IB_IN_SIZE 56 +#define INIT_IB_FLAGS_OFFSET 0x00 +#define INIT_IB_FLAG_SIG (1 << 18) +#define INIT_IB_FLAG_NG (1 << 17) +#define INIT_IB_FLAG_G0 (1 << 16) +#define INIT_IB_VL_SHIFT 4 +#define INIT_IB_PORT_WIDTH_SHIFT 8 +#define INIT_IB_MTU_SHIFT 12 +#define INIT_IB_MAX_GID_OFFSET 0x06 +#define INIT_IB_MAX_PKEY_OFFSET 0x0a +#define INIT_IB_GUID0_OFFSET 0x10 +#define INIT_IB_NODE_GUID_OFFSET 0x18 +#define INIT_IB_SI_GUID_OFFSET 0x20 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, INIT_IB_IN_SIZE); + + flags = 0; + flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0; + flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0; + flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0; + flags |= param->vl_cap << INIT_IB_VL_SHIFT; + flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT; + flags |= param->mtu_cap << INIT_IB_MTU_SHIFT; + MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET); + + MTHCA_PUT(inbox, param->gid_cap, INIT_IB_MAX_GID_OFFSET); + MTHCA_PUT(inbox, param->pkey_cap, INIT_IB_MAX_PKEY_OFFSET); + MTHCA_PUT(inbox, param->guid0, INIT_IB_GUID0_OFFSET); + MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET); + MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); + + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB, + CMD_TIME_CLASS_A); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_CLOSE_IB(struct mthca_dev *dev, int port) +{ + return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, CMD_TIME_CLASS_A); +} + +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic) +{ + return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, CMD_TIME_CLASS_C); +} + +int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, + int port) +{ + struct mthca_mailbox *mailbox; + u32 *inbox; + int err; + u32 flags = 0; + +#define SET_IB_IN_SIZE 0x40 +#define SET_IB_FLAGS_OFFSET 0x00 +#define SET_IB_FLAG_SIG (1 << 18) +#define SET_IB_FLAG_RQK (1 << 0) +#define SET_IB_CAP_MASK_OFFSET 0x04 +#define SET_IB_SI_GUID_OFFSET 0x08 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, SET_IB_IN_SIZE); + + flags |= param->set_si_guid ? SET_IB_FLAG_SIG : 0; + flags |= param->reset_qkey_viol ? SET_IB_FLAG_RQK : 0; + MTHCA_PUT(inbox, flags, SET_IB_FLAGS_OFFSET); + + MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET); + MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET); + + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB, + CMD_TIME_CLASS_B); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt) +{ + return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt); +} + +int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt) +{ + struct mthca_mailbox *mailbox; + __be64 *inbox; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + inbox[0] = cpu_to_be64(virt); + inbox[1] = cpu_to_be64(dma_addr); + + err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM, + CMD_TIME_CLASS_B); + + mthca_free_mailbox(dev, mailbox); + + if (!err) + mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n", + (unsigned long long) dma_addr, (unsigned long long) virt); + + return err; +} + +int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count) +{ + mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n", + page_count, (unsigned long long) virt); + + return mthca_cmd(dev, virt, page_count, 0, + CMD_UNMAP_ICM, CMD_TIME_CLASS_B); +} + +int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm) +{ + return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1); +} + +int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B); +} + +int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages) +{ + int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, + 0, CMD_SET_ICM_SIZE, CMD_TIME_CLASS_A); + + if (ret) + return ret; + + /* + * Round up number of system pages needed in case + * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE. + */ + *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >> + (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT); + + return 0; +} + +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index) +{ + return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT, + CMD_TIME_CLASS_B); +} + +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index) +{ + return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, + !mailbox, CMD_HW2SW_MPT, + CMD_TIME_CLASS_B); +} + +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int num_mtt) +{ + return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT, + CMD_TIME_CLASS_B); +} + +int mthca_SYNC_TPT(struct mthca_dev *dev) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B); +} + +int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, + int eq_num) +{ + mthca_dbg(dev, "%s mask %016llx for eqn %d\n", + unmap ? "Clearing" : "Setting", + (unsigned long long) event_mask, eq_num); + return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num, + 0, CMD_MAP_EQ, CMD_TIME_CLASS_B); +} + +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num) +{ + return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ, + CMD_TIME_CLASS_A); +} + +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0, + CMD_HW2SW_EQ, + CMD_TIME_CLASS_A); +} + +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num) +{ + return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ, + CMD_TIME_CLASS_A); +} + +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0, + CMD_HW2SW_CQ, + CMD_TIME_CLASS_A); +} + +int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size) +{ + struct mthca_mailbox *mailbox; + __be32 *inbox; + int err; + +#define RESIZE_CQ_IN_SIZE 0x40 +#define RESIZE_CQ_LOG_SIZE_OFFSET 0x0c +#define RESIZE_CQ_LKEY_OFFSET 0x1c + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, RESIZE_CQ_IN_SIZE); + /* + * Leave start address fields zeroed out -- mthca assumes that + * MRs for CQs always start at virtual address 0. + */ + MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET); + MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET); + + err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ, + CMD_TIME_CLASS_B); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num) +{ + return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ, + CMD_TIME_CLASS_A); +} + +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0, + CMD_HW2SW_SRQ, + CMD_TIME_CLASS_A); +} + +int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, + struct mthca_mailbox *mailbox) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, num, 0, + CMD_QUERY_SRQ, CMD_TIME_CLASS_A); +} + +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit) +{ + return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, + CMD_TIME_CLASS_B); +} + +int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, + enum ib_qp_state next, u32 num, int is_ee, + struct mthca_mailbox *mailbox, u32 optmask) +{ + static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_INIT] = CMD_RST2INIT_QPEE, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_INIT] = CMD_INIT2INIT_QPEE, + [IB_QPS_RTR] = CMD_INIT2RTR_QPEE, + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_RTR2RTS_QPEE, + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_RTS2RTS_QPEE, + [IB_QPS_SQD] = CMD_RTS2SQD_QPEE, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_SQD2RTS_QPEE, + [IB_QPS_SQD] = CMD_SQD2SQD_QPEE, + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + [IB_QPS_RTS] = CMD_SQERR2RTS_QPEE, + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = CMD_ERR2RST_QPEE, + [IB_QPS_ERR] = CMD_2ERR_QPEE, + } + }; + + u8 op_mod = 0; + int my_mailbox = 0; + int err; + + if (op[cur][next] == CMD_ERR2RST_QPEE) { + op_mod = 3; /* don't write outbox, any->reset */ + + /* For debugging */ + if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (!IS_ERR(mailbox)) { + my_mailbox = 1; + op_mod = 2; /* write outbox, any->reset */ + } else + mailbox = NULL; + } + + err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, + (!!is_ee << 24) | num, op_mod, + op[cur][next], CMD_TIME_CLASS_C); + + if (0 && mailbox) { + int i; + mthca_dbg(dev, "Dumping QP context:\n"); + printk(" %08x\n", be32_to_cpup(mailbox->buf)); + for (i = 0; i < 0x100 / 4; ++i) { + if (i % 8 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); + if ((i + 1) % 8 == 0) + printk("\n"); + } + } + + if (my_mailbox) + mthca_free_mailbox(dev, mailbox); + } else { + if (0) { + int i; + mthca_dbg(dev, "Dumping QP context:\n"); + printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf)); + for (i = 0; i < 0x100 / 4; ++i) { + if (i % 8 == 0) + printk(" [%02x] ", i * 4); + printk(" %08x", + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); + if ((i + 1) % 8 == 0) + printk("\n"); + } + } + + err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num, + op_mod, op[cur][next], CMD_TIME_CLASS_C); + } + + return err; +} + +int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, + struct mthca_mailbox *mailbox) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0, + CMD_QUERY_QPEE, CMD_TIME_CLASS_A); +} + +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn) +{ + u8 op_mod; + + switch (type) { + case IB_QPT_SMI: + op_mod = 0; + break; + case IB_QPT_GSI: + op_mod = 1; + break; + case IB_QPT_RAW_IPV6: + op_mod = 2; + break; + case IB_QPT_RAW_ETHERTYPE: + op_mod = 3; + break; + default: + return -EINVAL; + } + + return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP, + CMD_TIME_CLASS_B); +} + +int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad) +{ + struct mthca_mailbox *inmailbox, *outmailbox; + void *inbox; + int err; + u32 in_modifier = port; + u8 op_modifier = 0; + +#define MAD_IFC_BOX_SIZE 0x400 +#define MAD_IFC_MY_QPN_OFFSET 0x100 +#define MAD_IFC_RQPN_OFFSET 0x108 +#define MAD_IFC_SL_OFFSET 0x10c +#define MAD_IFC_G_PATH_OFFSET 0x10d +#define MAD_IFC_RLID_OFFSET 0x10e +#define MAD_IFC_PKEY_OFFSET 0x112 +#define MAD_IFC_GRH_OFFSET 0x140 + + inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + inbox = inmailbox->buf; + + outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(outmailbox)) { + mthca_free_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + memcpy(inbox, in_mad, 256); + + /* + * Key check traps can't be generated unless we have in_wc to + * tell us where to send the trap. + */ + if (ignore_mkey || !in_wc) + op_modifier |= 0x1; + if (ignore_bkey || !in_wc) + op_modifier |= 0x2; + + if (in_wc) { + u8 val; + + memset(inbox + 256, 0, 256); + + MTHCA_PUT(inbox, in_wc->qp->qp_num, MAD_IFC_MY_QPN_OFFSET); + MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); + + val = in_wc->sl << 4; + MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET); + + val = in_wc->dlid_path_bits | + (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); + MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET); + + MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); + + if (in_grh) + memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40); + + op_modifier |= 0x4; + + in_modifier |= in_wc->slid << 16; + } + + err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, + in_modifier, op_modifier, + CMD_MAD_IFC, CMD_TIME_CLASS_C); + + if (!err) + memcpy(response_mad, outmailbox->buf, 256); + + mthca_free_mailbox(dev, inmailbox); + mthca_free_mailbox(dev, outmailbox); + return err; +} + +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, index, 0, + CMD_READ_MGM, CMD_TIME_CLASS_A); +} + +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox) +{ + return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM, + CMD_TIME_CLASS_A); +} + +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash) +{ + u64 imm; + int err; + + err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, + CMD_TIME_CLASS_A); + + *hash = imm; + return err; +} + +int mthca_NOP(struct mthca_dev *dev) +{ + return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100)); +} diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h new file mode 100644 index 00000000..f952244c --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_CMD_H +#define MTHCA_CMD_H + +#include + +#define MTHCA_MAILBOX_SIZE 4096 + +enum { + /* command completed successfully: */ + MTHCA_CMD_STAT_OK = 0x00, + /* Internal error (such as a bus error) occurred while processing command: */ + MTHCA_CMD_STAT_INTERNAL_ERR = 0x01, + /* Operation/command not supported or opcode modifier not supported: */ + MTHCA_CMD_STAT_BAD_OP = 0x02, + /* Parameter not supported or parameter out of range: */ + MTHCA_CMD_STAT_BAD_PARAM = 0x03, + /* System not enabled or bad system state: */ + MTHCA_CMD_STAT_BAD_SYS_STATE = 0x04, + /* Attempt to access reserved or unallocaterd resource: */ + MTHCA_CMD_STAT_BAD_RESOURCE = 0x05, + /* Requested resource is currently executing a command, or is otherwise busy: */ + MTHCA_CMD_STAT_RESOURCE_BUSY = 0x06, + /* memory error: */ + MTHCA_CMD_STAT_DDR_MEM_ERR = 0x07, + /* Required capability exceeds device limits: */ + MTHCA_CMD_STAT_EXCEED_LIM = 0x08, + /* Resource is not in the appropriate state or ownership: */ + MTHCA_CMD_STAT_BAD_RES_STATE = 0x09, + /* Index out of range: */ + MTHCA_CMD_STAT_BAD_INDEX = 0x0a, + /* FW image corrupted: */ + MTHCA_CMD_STAT_BAD_NVMEM = 0x0b, + /* Attempt to modify a QP/EE which is not in the presumed state: */ + MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10, + /* Bad segment parameters (Address/Size): */ + MTHCA_CMD_STAT_BAD_SEG_PARAM = 0x20, + /* Memory Region has Memory Windows bound to: */ + MTHCA_CMD_STAT_REG_BOUND = 0x21, + /* HCA local attached memory not present: */ + MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22, + /* Bad management packet (silently discarded): */ + MTHCA_CMD_STAT_BAD_PKT = 0x30, + /* More outstanding CQEs in CQ than new CQ size: */ + MTHCA_CMD_STAT_BAD_SIZE = 0x40 +}; + +enum { + MTHCA_TRANS_INVALID = 0, + MTHCA_TRANS_RST2INIT, + MTHCA_TRANS_INIT2INIT, + MTHCA_TRANS_INIT2RTR, + MTHCA_TRANS_RTR2RTS, + MTHCA_TRANS_RTS2RTS, + MTHCA_TRANS_SQERR2RTS, + MTHCA_TRANS_ANY2ERR, + MTHCA_TRANS_RTS2SQD, + MTHCA_TRANS_SQD2SQD, + MTHCA_TRANS_SQD2RTS, + MTHCA_TRANS_ANY2RST, +}; + +enum { + DEV_LIM_FLAG_RC = 1 << 0, + DEV_LIM_FLAG_UC = 1 << 1, + DEV_LIM_FLAG_UD = 1 << 2, + DEV_LIM_FLAG_RD = 1 << 3, + DEV_LIM_FLAG_RAW_IPV6 = 1 << 4, + DEV_LIM_FLAG_RAW_ETHER = 1 << 5, + DEV_LIM_FLAG_SRQ = 1 << 6, + DEV_LIM_FLAG_IPOIB_CSUM = 1 << 7, + DEV_LIM_FLAG_BAD_PKEY_CNTR = 1 << 8, + DEV_LIM_FLAG_BAD_QKEY_CNTR = 1 << 9, + DEV_LIM_FLAG_MW = 1 << 16, + DEV_LIM_FLAG_AUTO_PATH_MIG = 1 << 17, + DEV_LIM_FLAG_ATOMIC = 1 << 18, + DEV_LIM_FLAG_RAW_MULTI = 1 << 19, + DEV_LIM_FLAG_UD_AV_PORT_ENFORCE = 1 << 20, + DEV_LIM_FLAG_UD_MULTI = 1 << 21, +}; + +struct mthca_mailbox { + dma_addr_t dma; + void *buf; +}; + +struct mthca_dev_lim { + int max_srq_sz; + int max_qp_sz; + int reserved_qps; + int max_qps; + int reserved_srqs; + int max_srqs; + int reserved_eecs; + int max_eecs; + int max_cq_sz; + int reserved_cqs; + int max_cqs; + int max_mpts; + int reserved_eqs; + int max_eqs; + int reserved_mtts; + int max_mrw_sz; + int reserved_mrws; + int max_mtt_seg; + int max_requester_per_qp; + int max_responder_per_qp; + int max_rdma_global; + int local_ca_ack_delay; + int max_mtu; + int max_port_width; + int max_vl; + int num_ports; + int max_gids; + u16 stat_rate_support; + int max_pkeys; + u32 flags; + int reserved_uars; + int uar_size; + int min_page_sz; + int max_sg; + int max_desc_sz; + int max_qp_per_mcg; + int reserved_mgms; + int max_mcgs; + int reserved_pds; + int max_pds; + int reserved_rdds; + int max_rdds; + int eec_entry_sz; + int qpc_entry_sz; + int eeec_entry_sz; + int eqpc_entry_sz; + int eqc_entry_sz; + int cqc_entry_sz; + int srq_entry_sz; + int uar_scratch_entry_sz; + int mpt_entry_sz; + union { + struct { + int max_avs; + } tavor; + struct { + int resize_srq; + int max_pbl_sz; + u8 bmme_flags; + u32 reserved_lkey; + int lam_required; + u64 max_icm_sz; + } arbel; + } hca; +}; + +struct mthca_adapter { + u32 vendor_id; + u32 device_id; + u32 revision_id; + char board_id[MTHCA_BOARD_ID_LEN]; + u8 inta_pin; +}; + +struct mthca_init_hca_param { + u64 qpc_base; + u64 eec_base; + u64 srqc_base; + u64 cqc_base; + u64 eqpc_base; + u64 eeec_base; + u64 eqc_base; + u64 rdb_base; + u64 mc_base; + u64 mpt_base; + u64 mtt_base; + u64 uar_scratch_base; + u64 uarc_base; + u16 log_mc_entry_sz; + u16 mc_hash_sz; + u8 log_num_qps; + u8 log_num_eecs; + u8 log_num_srqs; + u8 log_num_cqs; + u8 log_num_eqs; + u8 log_mc_table_sz; + u8 mtt_seg_sz; + u8 log_mpt_sz; + u8 log_uar_sz; + u8 log_uarc_sz; +}; + +struct mthca_init_ib_param { + int port_width; + int vl_cap; + int mtu_cap; + u16 gid_cap; + u16 pkey_cap; + int set_guid0; + u64 guid0; + int set_node_guid; + u64 node_guid; + int set_si_guid; + u64 si_guid; +}; + +struct mthca_set_ib_param { + int set_si_guid; + int reset_qkey_viol; + u64 si_guid; + u32 cap_mask; +}; + +int mthca_cmd_init(struct mthca_dev *dev); +void mthca_cmd_cleanup(struct mthca_dev *dev); +int mthca_cmd_use_events(struct mthca_dev *dev); +void mthca_cmd_use_polling(struct mthca_dev *dev); +void mthca_cmd_event(struct mthca_dev *dev, u16 token, + u8 status, u64 out_param); + +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + gfp_t gfp_mask); +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); + +int mthca_SYS_EN(struct mthca_dev *dev); +int mthca_SYS_DIS(struct mthca_dev *dev); +int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm); +int mthca_UNMAP_FA(struct mthca_dev *dev); +int mthca_RUN_FW(struct mthca_dev *dev); +int mthca_QUERY_FW(struct mthca_dev *dev); +int mthca_ENABLE_LAM(struct mthca_dev *dev); +int mthca_DISABLE_LAM(struct mthca_dev *dev); +int mthca_QUERY_DDR(struct mthca_dev *dev); +int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, + struct mthca_dev_lim *dev_lim); +int mthca_QUERY_ADAPTER(struct mthca_dev *dev, + struct mthca_adapter *adapter); +int mthca_INIT_HCA(struct mthca_dev *dev, + struct mthca_init_hca_param *param); +int mthca_INIT_IB(struct mthca_dev *dev, + struct mthca_init_ib_param *param, + int port); +int mthca_CLOSE_IB(struct mthca_dev *dev, int port); +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic); +int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, + int port); +int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt); +int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt); +int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count); +int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm); +int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev); +int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages); +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index); +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index); +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int num_mtt); +int mthca_SYNC_TPT(struct mthca_dev *dev); +int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, + int eq_num); +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num); +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num); +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num); +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num); +int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size); +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num); +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num); +int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, + struct mthca_mailbox *mailbox); +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit); +int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, + enum ib_qp_state next, u32 num, int is_ee, + struct mthca_mailbox *mailbox, u32 optmask); +int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, + struct mthca_mailbox *mailbox); +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn); +int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad); +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox); +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox); +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash); +int mthca_NOP(struct mthca_dev *dev); + +#endif /* MTHCA_CMD_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h new file mode 100644 index 00000000..155bc663 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_CONFIG_REG_H +#define MTHCA_CONFIG_REG_H + +#define MTHCA_HCR_BASE 0x80680 +#define MTHCA_HCR_SIZE 0x0001c +#define MTHCA_ECR_BASE 0x80700 +#define MTHCA_ECR_SIZE 0x00008 +#define MTHCA_ECR_CLR_BASE 0x80708 +#define MTHCA_ECR_CLR_SIZE 0x00008 +#define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE) +#define MTHCA_CLR_INT_BASE 0xf00d8 +#define MTHCA_CLR_INT_SIZE 0x00008 +#define MTHCA_EQ_SET_CI_SIZE (8 * 32) + +#endif /* MTHCA_CONFIG_REG_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c new file mode 100644 index 00000000..40ba8333 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -0,0 +1,984 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include + +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" + +enum { + MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE +}; + +enum { + MTHCA_CQ_ENTRY_SIZE = 0x20 +}; + +enum { + MTHCA_ATOMIC_BYTE_LEN = 8 +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +struct mthca_cq_context { + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 error_eqn; /* Tavor only */ + __be32 comp_eqn; + __be32 pd; + __be32 lkey; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + __be32 cqn; + __be32 ci_db; /* Arbel only */ + __be32 state_db; /* Arbel only */ + u32 reserved; +} __attribute__((packed)); + +#define MTHCA_CQ_STATUS_OK ( 0 << 28) +#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28) +#define MTHCA_CQ_STATUS_WRITE_FAIL (10 << 28) +#define MTHCA_CQ_FLAG_TR ( 1 << 18) +#define MTHCA_CQ_FLAG_OI ( 1 << 17) +#define MTHCA_CQ_STATE_DISARMED ( 0 << 8) +#define MTHCA_CQ_STATE_ARMED ( 1 << 8) +#define MTHCA_CQ_STATE_ARMED_SOL ( 4 << 8) +#define MTHCA_EQ_STATE_FIRED (10 << 8) + +enum { + MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe +}; + +enum { + SYNDROME_LOCAL_LENGTH_ERR = 0x01, + SYNDROME_LOCAL_QP_OP_ERR = 0x02, + SYNDROME_LOCAL_EEC_OP_ERR = 0x03, + SYNDROME_LOCAL_PROT_ERR = 0x04, + SYNDROME_WR_FLUSH_ERR = 0x05, + SYNDROME_MW_BIND_ERR = 0x06, + SYNDROME_BAD_RESP_ERR = 0x10, + SYNDROME_LOCAL_ACCESS_ERR = 0x11, + SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + SYNDROME_REMOTE_ACCESS_ERR = 0x13, + SYNDROME_REMOTE_OP_ERR = 0x14, + SYNDROME_RETRY_EXC_ERR = 0x15, + SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20, + SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21, + SYNDROME_REMOTE_ABORTED_ERR = 0x22, + SYNDROME_INVAL_EECN_ERR = 0x23, + SYNDROME_INVAL_EEC_STATE_ERR = 0x24 +}; + +struct mthca_cqe { + __be32 my_qpn; + __be32 my_ee; + __be32 rqpn; + u8 sl_ipok; + u8 g_mlpath; + __be16 rlid; + __be32 imm_etype_pkey_eec; + __be32 byte_cnt; + __be32 wqe; + u8 opcode; + u8 is_send; + u8 reserved; + u8 owner; +}; + +struct mthca_err_cqe { + __be32 my_qpn; + u32 reserved1[3]; + u8 syndrome; + u8 vendor_err; + __be16 db_cnt; + u32 reserved2; + __be32 wqe; + u8 opcode; + u8 reserved3[2]; + u8 owner; +}; + +#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7) +#define MTHCA_CQ_ENTRY_OWNER_HW (1 << 7) + +#define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24) +#define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24) + +#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24) +#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24) +#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24) + +static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf, + int entry) +{ + if (buf->is_direct) + return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE); + else + return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf + + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE; +} + +static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +{ + return get_cqe_from_buf(&cq->buf, entry); +} + +static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe) +{ + return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe; +} + +static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq) +{ + return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe)); +} + +static inline void set_cqe_hw(struct mthca_cqe *cqe) +{ + cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; +} + +static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) +{ + __be32 *cqe = cqe_ptr; + + (void) cqe; /* avoid warning if mthca_dbg compiled away... */ + mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]), + be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]), + be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7])); +} + +/* + * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index + * should be correct before calling update_cons_index(). + */ +static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, + int incr) +{ + if (mthca_is_memfree(dev)) { + *cq->set_ci_db = cpu_to_be32(cq->cons_index); + wmb(); + } else { + mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1, + dev->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + /* + * Make sure doorbells don't leak out of CQ spinlock + * and reach the HCA out of order: + */ + mmiowb(); + } +} + +void mthca_cq_completion(struct mthca_dev *dev, u32 cqn) +{ + struct mthca_cq *cq; + + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + + if (!cq) { + mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn); + return; + } + + ++cq->arm_sn; + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +void mthca_cq_event(struct mthca_dev *dev, u32 cqn, + enum ib_event_type event_type) +{ + struct mthca_cq *cq; + struct ib_event event; + + spin_lock(&dev->cq_table.lock); + + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + if (cq) + ++cq->refcount; + + spin_unlock(&dev->cq_table.lock); + + if (!cq) { + mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn); + return; + } + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.cq = &cq->ibcq; + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&event, cq->ibcq.cq_context); + + spin_lock(&dev->cq_table.lock); + if (!--cq->refcount) + wake_up(&cq->wait); + spin_unlock(&dev->cq_table.lock); +} + +static inline int is_recv_cqe(struct mthca_cqe *cqe) +{ + if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK) + return !(cqe->opcode & 0x01); + else + return !(cqe->is_send & 0x80); +} + +void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, + struct mthca_srq *srq) +{ + struct mthca_cqe *cqe; + u32 prod_index; + int i, nfreed = 0; + + spin_lock_irq(&cq->lock); + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->cons_index; + cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe)); + ++prod_index) + if (prod_index == cq->cons_index + cq->ibcq.cqe) + break; + + if (0) + mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", + qpn, cq->cqn, cq->cons_index, prod_index); + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + if (cqe->my_qpn == cpu_to_be32(qpn)) { + if (srq && is_recv_cqe(cqe)) + mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); + ++nfreed; + } else if (nfreed) + memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), + cqe, MTHCA_CQ_ENTRY_SIZE); + } + + if (nfreed) { + for (i = 0; i < nfreed; ++i) + set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe)); + wmb(); + cq->cons_index += nfreed; + update_cons_index(dev, cq, nfreed); + } + + spin_unlock_irq(&cq->lock); +} + +void mthca_cq_resize_copy_cqes(struct mthca_cq *cq) +{ + int i; + + /* + * In Tavor mode, the hardware keeps the consumer and producer + * indices mod the CQ size. Since we might be making the CQ + * bigger, we need to deal with the case where the producer + * index wrapped around before the CQ was resized. + */ + if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) && + cq->ibcq.cqe < cq->resize_buf->cqe) { + cq->cons_index &= cq->ibcq.cqe; + if (cqe_sw(get_cqe(cq, cq->ibcq.cqe))) + cq->cons_index -= cq->ibcq.cqe + 1; + } + + for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i) + memcpy(get_cqe_from_buf(&cq->resize_buf->buf, + i & cq->resize_buf->cqe), + get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE); +} + +int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent) +{ + int ret; + int i; + + ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE, + MTHCA_MAX_DIRECT_CQ_SIZE, + &buf->queue, &buf->is_direct, + &dev->driver_pd, 1, &buf->mr); + if (ret) + return ret; + + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe_from_buf(buf, i)); + + return 0; +} + +void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe) +{ + mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue, + buf->is_direct, &buf->mr); +} + +static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, + struct mthca_qp *qp, int wqe_index, int is_send, + struct mthca_err_cqe *cqe, + struct ib_wc *entry, int *free_cqe) +{ + int dbd; + __be32 new_wqe; + + if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { + mthca_dbg(dev, "local QP operation err " + "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n", + be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe), + cq->cqn, cq->cons_index); + dump_cqe(dev, cqe); + } + + /* + * For completions in error, only work request ID, status, vendor error + * (and freed resource count for RD) have to be set. + */ + switch (cqe->syndrome) { + case SYNDROME_LOCAL_LENGTH_ERR: + entry->status = IB_WC_LOC_LEN_ERR; + break; + case SYNDROME_LOCAL_QP_OP_ERR: + entry->status = IB_WC_LOC_QP_OP_ERR; + break; + case SYNDROME_LOCAL_EEC_OP_ERR: + entry->status = IB_WC_LOC_EEC_OP_ERR; + break; + case SYNDROME_LOCAL_PROT_ERR: + entry->status = IB_WC_LOC_PROT_ERR; + break; + case SYNDROME_WR_FLUSH_ERR: + entry->status = IB_WC_WR_FLUSH_ERR; + break; + case SYNDROME_MW_BIND_ERR: + entry->status = IB_WC_MW_BIND_ERR; + break; + case SYNDROME_BAD_RESP_ERR: + entry->status = IB_WC_BAD_RESP_ERR; + break; + case SYNDROME_LOCAL_ACCESS_ERR: + entry->status = IB_WC_LOC_ACCESS_ERR; + break; + case SYNDROME_REMOTE_INVAL_REQ_ERR: + entry->status = IB_WC_REM_INV_REQ_ERR; + break; + case SYNDROME_REMOTE_ACCESS_ERR: + entry->status = IB_WC_REM_ACCESS_ERR; + break; + case SYNDROME_REMOTE_OP_ERR: + entry->status = IB_WC_REM_OP_ERR; + break; + case SYNDROME_RETRY_EXC_ERR: + entry->status = IB_WC_RETRY_EXC_ERR; + break; + case SYNDROME_RNR_RETRY_EXC_ERR: + entry->status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case SYNDROME_LOCAL_RDD_VIOL_ERR: + entry->status = IB_WC_LOC_RDD_VIOL_ERR; + break; + case SYNDROME_REMOTE_INVAL_RD_REQ_ERR: + entry->status = IB_WC_REM_INV_RD_REQ_ERR; + break; + case SYNDROME_REMOTE_ABORTED_ERR: + entry->status = IB_WC_REM_ABORT_ERR; + break; + case SYNDROME_INVAL_EECN_ERR: + entry->status = IB_WC_INV_EECN_ERR; + break; + case SYNDROME_INVAL_EEC_STATE_ERR: + entry->status = IB_WC_INV_EEC_STATE_ERR; + break; + default: + entry->status = IB_WC_GENERAL_ERR; + break; + } + + entry->vendor_err = cqe->vendor_err; + + /* + * Mem-free HCAs always generate one CQE per WQE, even in the + * error case, so we don't have to check the doorbell count, etc. + */ + if (mthca_is_memfree(dev)) + return; + + mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); + + /* + * If we're at the end of the WQE chain, or we've used up our + * doorbell count, free the CQE. Otherwise just update it for + * the next poll operation. + */ + if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) + return; + + be16_add_cpu(&cqe->db_cnt, -dbd); + cqe->wqe = new_wqe; + cqe->syndrome = SYNDROME_WR_FLUSH_ERR; + + *free_cqe = 0; +} + +static inline int mthca_poll_one(struct mthca_dev *dev, + struct mthca_cq *cq, + struct mthca_qp **cur_qp, + int *freed, + struct ib_wc *entry) +{ + struct mthca_wq *wq; + struct mthca_cqe *cqe; + int wqe_index; + int is_error; + int is_send; + int free_cqe = 1; + int err = 0; + u16 checksum; + + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + if (0) { + mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", + cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), + be32_to_cpu(cqe->wqe)); + dump_cqe(dev, cqe); + } + + is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK; + is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80; + + if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + *cur_qp = mthca_array_get(&dev->qp_table.qp, + be32_to_cpu(cqe->my_qpn) & + (dev->limits.num_qps - 1)); + if (!*cur_qp) { + mthca_warn(dev, "CQ entry for unknown QP %06x\n", + be32_to_cpu(cqe->my_qpn) & 0xffffff); + err = -EINVAL; + goto out; + } + } + + entry->qp = &(*cur_qp)->ibqp; + + if (is_send) { + wq = &(*cur_qp)->sq; + wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset) + >> wq->wqe_shift); + entry->wr_id = (*cur_qp)->wrid[wqe_index + + (*cur_qp)->rq.max]; + } else if ((*cur_qp)->ibqp.srq) { + struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq); + u32 wqe = be32_to_cpu(cqe->wqe); + wq = NULL; + wqe_index = wqe >> srq->wqe_shift; + entry->wr_id = srq->wrid[wqe_index]; + mthca_free_srq_wqe(srq, wqe); + } else { + s32 wqe; + wq = &(*cur_qp)->rq; + wqe = be32_to_cpu(cqe->wqe); + wqe_index = wqe >> wq->wqe_shift; + /* + * WQE addr == base - 1 might be reported in receive completion + * with error instead of (rq size - 1) by Sinai FW 1.0.800 and + * Arbel FW 5.1.400. This bug should be fixed in later FW revs. + */ + if (unlikely(wqe_index < 0)) + wqe_index = wq->max - 1; + entry->wr_id = (*cur_qp)->wrid[wqe_index]; + } + + if (wq) { + if (wq->last_comp < wqe_index) + wq->tail += wqe_index - wq->last_comp; + else + wq->tail += wqe_index + wq->max - wq->last_comp; + + wq->last_comp = wqe_index; + } + + if (is_error) { + handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, + (struct mthca_err_cqe *) cqe, + entry, &free_cqe); + goto out; + } + + if (is_send) { + entry->wc_flags = 0; + switch (cqe->opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + entry->opcode = IB_WC_RDMA_WRITE; + break; + case MTHCA_OPCODE_RDMA_WRITE_IMM: + entry->opcode = IB_WC_RDMA_WRITE; + entry->wc_flags |= IB_WC_WITH_IMM; + break; + case MTHCA_OPCODE_SEND: + entry->opcode = IB_WC_SEND; + break; + case MTHCA_OPCODE_SEND_IMM: + entry->opcode = IB_WC_SEND; + entry->wc_flags |= IB_WC_WITH_IMM; + break; + case MTHCA_OPCODE_RDMA_READ: + entry->opcode = IB_WC_RDMA_READ; + entry->byte_len = be32_to_cpu(cqe->byte_cnt); + break; + case MTHCA_OPCODE_ATOMIC_CS: + entry->opcode = IB_WC_COMP_SWAP; + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; + break; + case MTHCA_OPCODE_ATOMIC_FA: + entry->opcode = IB_WC_FETCH_ADD; + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; + break; + case MTHCA_OPCODE_BIND_MW: + entry->opcode = IB_WC_BIND_MW; + break; + default: + entry->opcode = MTHCA_OPCODE_INVALID; + break; + } + } else { + entry->byte_len = be32_to_cpu(cqe->byte_cnt); + switch (cqe->opcode & 0x1f) { + case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: + entry->wc_flags = IB_WC_WITH_IMM; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; + entry->opcode = IB_WC_RECV; + break; + case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + entry->wc_flags = IB_WC_WITH_IMM; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; + entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->wc_flags = 0; + entry->opcode = IB_WC_RECV; + break; + } + entry->slid = be16_to_cpu(cqe->rlid); + entry->sl = cqe->sl_ipok >> 4; + entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff; + entry->dlid_path_bits = cqe->g_mlpath & 0x7f; + entry->pkey_index = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16; + entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0; + checksum = (be32_to_cpu(cqe->rqpn) >> 24) | + ((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00); + entry->wc_flags |= (cqe->sl_ipok & 1 && checksum == 0xffff) ? + IB_WC_IP_CSUM_OK : 0; + } + + entry->status = IB_WC_SUCCESS; + + out: + if (likely(free_cqe)) { + set_cqe_hw(cqe); + ++(*freed); + ++cq->cons_index; + } + + return err; +} + +int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *entry) +{ + struct mthca_dev *dev = to_mdev(ibcq->device); + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_qp *qp = NULL; + unsigned long flags; + int err = 0; + int freed = 0; + int npolled; + + spin_lock_irqsave(&cq->lock, flags); + + npolled = 0; +repoll: + while (npolled < num_entries) { + err = mthca_poll_one(dev, cq, &qp, + &freed, entry + npolled); + if (err) + break; + ++npolled; + } + + if (freed) { + wmb(); + update_cons_index(dev, cq, freed); + } + + /* + * If a CQ resize is in progress and we discovered that the + * old buffer is empty, then peek in the new buffer, and if + * it's not empty, switch to the new buffer and continue + * polling there. + */ + if (unlikely(err == -EAGAIN && cq->resize_buf && + cq->resize_buf->state == CQ_RESIZE_READY)) { + /* + * In Tavor mode, the hardware keeps the producer + * index modulo the CQ size. Since we might be making + * the CQ bigger, we need to mask our consumer index + * using the size of the old CQ buffer before looking + * in the new CQ buffer. + */ + if (!mthca_is_memfree(dev)) + cq->cons_index &= cq->ibcq.cqe; + + if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf, + cq->cons_index & cq->resize_buf->cqe))) { + struct mthca_cq_buf tbuf; + int tcqe; + + tbuf = cq->buf; + tcqe = cq->ibcq.cqe; + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + cq->resize_buf->buf = tbuf; + cq->resize_buf->cqe = tcqe; + cq->resize_buf->state = CQ_RESIZE_SWAPPED; + + goto repoll; + } + } + + spin_unlock_irqrestore(&cq->lock, flags); + + return err == 0 || err == -EAGAIN ? npolled : err; +} + +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) +{ + u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : + MTHCA_TAVOR_CQ_DB_REQ_NOT) | + to_mcq(cq)->cqn; + + mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock)); + + return 0; +} + +int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct mthca_cq *cq = to_mcq(ibcq); + __be32 db_rec[2]; + u32 dbhi; + u32 sn = cq->arm_sn & 3; + + db_rec[0] = cpu_to_be32(cq->cons_index); + db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | + ((flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? 1 : 2)); + + mthca_write_db_rec(db_rec, cq->arm_db); + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + dbhi = (sn << 28) | + ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : + MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn; + + mthca_write64(dbhi, cq->cons_index, + to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock)); + + return 0; +} + +int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, + struct mthca_cq *cq) +{ + struct mthca_mailbox *mailbox; + struct mthca_cq_context *cq_context; + int err = -ENOMEM; + + cq->ibcq.cqe = nent - 1; + cq->is_kernel = !ctx; + + cq->cqn = mthca_alloc(&dev->cq_table.alloc); + if (cq->cqn == -1) + return -ENOMEM; + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); + if (err) + goto err_out; + + if (cq->is_kernel) { + cq->arm_sn = 1; + + err = -ENOMEM; + + cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, + cq->cqn, &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_out_icm; + + cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, + cq->cqn, &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_out_ci; + } + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_arm; + + cq_context = mailbox->buf; + + if (cq->is_kernel) { + err = mthca_alloc_cq_buf(dev, &cq->buf, nent); + if (err) + goto err_out_mailbox; + } + + spin_lock_init(&cq->lock); + cq->refcount = 1; + init_waitqueue_head(&cq->wait); + mutex_init(&cq->mutex); + + memset(cq_context, 0, sizeof *cq_context); + cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | + MTHCA_CQ_STATE_DISARMED | + MTHCA_CQ_FLAG_TR); + cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + if (ctx) + cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); + else + cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); + cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); + cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); + cq_context->pd = cpu_to_be32(pdn); + cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey); + cq_context->cqn = cpu_to_be32(cq->cqn); + + if (mthca_is_memfree(dev)) { + cq_context->ci_db = cpu_to_be32(cq->set_ci_db_index); + cq_context->state_db = cpu_to_be32(cq->arm_db_index); + } + + err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn); + if (err) { + mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); + goto err_out_free_mr; + } + + spin_lock_irq(&dev->cq_table.lock); + if (mthca_array_set(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1), + cq)) { + spin_unlock_irq(&dev->cq_table.lock); + goto err_out_free_mr; + } + spin_unlock_irq(&dev->cq_table.lock); + + cq->cons_index = 0; + + mthca_free_mailbox(dev, mailbox); + + return 0; + +err_out_free_mr: + if (cq->is_kernel) + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_arm: + if (cq->is_kernel && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + +err_out_ci: + if (cq->is_kernel && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + +err_out_icm: + mthca_table_put(dev, dev->cq_table.table, cq->cqn); + +err_out: + mthca_free(&dev->cq_table.alloc, cq->cqn); + + return err; +} + +static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq) +{ + int c; + + spin_lock_irq(&dev->cq_table.lock); + c = cq->refcount; + spin_unlock_irq(&dev->cq_table.lock); + + return c; +} + +void mthca_free_cq(struct mthca_dev *dev, + struct mthca_cq *cq) +{ + struct mthca_mailbox *mailbox; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + mthca_warn(dev, "No memory for mailbox to free CQ.\n"); + return; + } + + err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn); + if (err) + mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); + + if (0) { + __be32 *ctx = mailbox->buf; + int j; + + printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", + cq->cqn, cq->cons_index, + cq->is_kernel ? !!next_cqe_sw(cq) : 0); + for (j = 0; j < 16; ++j) + printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); + } + + spin_lock_irq(&dev->cq_table.lock); + mthca_array_clear(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1)); + --cq->refcount; + spin_unlock_irq(&dev->cq_table.lock); + + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) + synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector); + else + synchronize_irq(dev->pdev->irq); + + wait_event(cq->wait, !get_cq_refcount(dev, cq)); + + if (cq->is_kernel) { + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + } + } + + mthca_table_put(dev, dev->cq_table.table, cq->cqn); + mthca_free(&dev->cq_table.alloc, cq->cqn); + mthca_free_mailbox(dev, mailbox); +} + +int mthca_init_cq_table(struct mthca_dev *dev) +{ + int err; + + spin_lock_init(&dev->cq_table.lock); + + err = mthca_alloc_init(&dev->cq_table.alloc, + dev->limits.num_cqs, + (1 << 24) - 1, + dev->limits.reserved_cqs); + if (err) + return err; + + err = mthca_array_init(&dev->cq_table.cq, + dev->limits.num_cqs); + if (err) + mthca_alloc_cleanup(&dev->cq_table.alloc); + + return err; +} + +void mthca_cleanup_cq_table(struct mthca_dev *dev) +{ + mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs); + mthca_alloc_cleanup(&dev->cq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h new file mode 100644 index 00000000..7e6a6d64 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_DEV_H +#define MTHCA_DEV_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mthca_provider.h" +#include "mthca_doorbell.h" + +#define DRV_NAME "ib_mthca" +#define PFX DRV_NAME ": " +#define DRV_VERSION "1.0" +#define DRV_RELDATE "April 4, 2008" + +enum { + MTHCA_FLAG_DDR_HIDDEN = 1 << 1, + MTHCA_FLAG_SRQ = 1 << 2, + MTHCA_FLAG_MSI_X = 1 << 3, + MTHCA_FLAG_NO_LAM = 1 << 4, + MTHCA_FLAG_FMR = 1 << 5, + MTHCA_FLAG_MEMFREE = 1 << 6, + MTHCA_FLAG_PCIE = 1 << 7, + MTHCA_FLAG_SINAI_OPT = 1 << 8 +}; + +enum { + MTHCA_MAX_PORTS = 2 +}; + +enum { + MTHCA_BOARD_ID_LEN = 64 +}; + +enum { + MTHCA_EQ_CONTEXT_SIZE = 0x40, + MTHCA_CQ_CONTEXT_SIZE = 0x40, + MTHCA_QP_CONTEXT_SIZE = 0x200, + MTHCA_RDB_ENTRY_SIZE = 0x20, + MTHCA_AV_SIZE = 0x20, + MTHCA_MGM_ENTRY_SIZE = 0x100, + + /* Arbel FW gives us these, but we need them for Tavor */ + MTHCA_MPT_ENTRY_SIZE = 0x40, + MTHCA_MTT_SEG_SIZE = 0x40, + + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) +}; + +enum { + MTHCA_EQ_CMD, + MTHCA_EQ_ASYNC, + MTHCA_EQ_COMP, + MTHCA_NUM_EQ +}; + +enum { + MTHCA_OPCODE_NOP = 0x00, + MTHCA_OPCODE_RDMA_WRITE = 0x08, + MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09, + MTHCA_OPCODE_SEND = 0x0a, + MTHCA_OPCODE_SEND_IMM = 0x0b, + MTHCA_OPCODE_RDMA_READ = 0x10, + MTHCA_OPCODE_ATOMIC_CS = 0x11, + MTHCA_OPCODE_ATOMIC_FA = 0x12, + MTHCA_OPCODE_BIND_MW = 0x18, + MTHCA_OPCODE_INVALID = 0xff +}; + +enum { + MTHCA_CMD_USE_EVENTS = 1 << 0, + MTHCA_CMD_POST_DOORBELLS = 1 << 1 +}; + +enum { + MTHCA_CMD_NUM_DBELL_DWORDS = 8 +}; + +struct mthca_cmd { + struct pci_pool *pool; + struct mutex hcr_mutex; + struct semaphore poll_sem; + struct semaphore event_sem; + int max_cmds; + spinlock_t context_lock; + int free_head; + struct mthca_cmd_context *context; + u16 token_mask; + u32 flags; + void __iomem *dbell_map; + u16 dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS]; +}; + +struct mthca_limits { + int num_ports; + int vl_cap; + int mtu_cap; + int gid_table_len; + int pkey_table_len; + int local_ca_ack_delay; + int num_uars; + int max_sg; + int num_qps; + int max_wqes; + int max_desc_sz; + int max_qp_init_rdma; + int reserved_qps; + int num_srqs; + int max_srq_wqes; + int max_srq_sge; + int reserved_srqs; + int num_eecs; + int reserved_eecs; + int num_cqs; + int max_cqes; + int reserved_cqs; + int num_eqs; + int reserved_eqs; + int num_mpts; + int num_mtt_segs; + int mtt_seg_size; + int fmr_reserved_mtts; + int reserved_mtts; + int reserved_mrws; + int reserved_uars; + int num_mgms; + int num_amgms; + int reserved_mcgs; + int num_pds; + int reserved_pds; + u32 page_size_cap; + u32 flags; + u16 stat_rate_support; + u8 port_width_cap; +}; + +struct mthca_alloc { + u32 last; + u32 top; + u32 max; + u32 mask; + spinlock_t lock; + unsigned long *table; +}; + +struct mthca_array { + struct { + void **page; + int used; + } *page_list; +}; + +struct mthca_uar_table { + struct mthca_alloc alloc; + u64 uarc_base; + int uarc_size; +}; + +struct mthca_pd_table { + struct mthca_alloc alloc; +}; + +struct mthca_buddy { + unsigned long **bits; + int *num_free; + int max_order; + spinlock_t lock; +}; + +struct mthca_mr_table { + struct mthca_alloc mpt_alloc; + struct mthca_buddy mtt_buddy; + struct mthca_buddy *fmr_mtt_buddy; + u64 mtt_base; + u64 mpt_base; + struct mthca_icm_table *mtt_table; + struct mthca_icm_table *mpt_table; + struct { + void __iomem *mpt_base; + void __iomem *mtt_base; + struct mthca_buddy mtt_buddy; + } tavor_fmr; +}; + +struct mthca_eq_table { + struct mthca_alloc alloc; + void __iomem *clr_int; + u32 clr_mask; + u32 arm_mask; + struct mthca_eq eq[MTHCA_NUM_EQ]; + u64 icm_virt; + struct page *icm_page; + dma_addr_t icm_dma; + int have_irq; + u8 inta_pin; +}; + +struct mthca_cq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array cq; + struct mthca_icm_table *table; +}; + +struct mthca_srq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array srq; + struct mthca_icm_table *table; +}; + +struct mthca_qp_table { + struct mthca_alloc alloc; + u32 rdb_base; + int rdb_shift; + int sqp_start; + spinlock_t lock; + struct mthca_array qp; + struct mthca_icm_table *qp_table; + struct mthca_icm_table *eqp_table; + struct mthca_icm_table *rdb_table; +}; + +struct mthca_av_table { + struct pci_pool *pool; + int num_ddr_avs; + u64 ddr_av_base; + void __iomem *av_map; + struct mthca_alloc alloc; +}; + +struct mthca_mcg_table { + struct mutex mutex; + struct mthca_alloc alloc; + struct mthca_icm_table *table; +}; + +struct mthca_catas_err { + u64 addr; + u32 __iomem *map; + u32 size; + struct timer_list timer; + struct list_head list; +}; + +extern struct mutex mthca_device_mutex; + +struct mthca_dev { + struct ib_device ib_dev; + struct pci_dev *pdev; + + int hca_type; + unsigned long mthca_flags; + unsigned long device_cap_flags; + + u32 rev_id; + char board_id[MTHCA_BOARD_ID_LEN]; + + /* firmware info */ + u64 fw_ver; + union { + struct { + u64 fw_start; + u64 fw_end; + } tavor; + struct { + u64 clr_int_base; + u64 eq_arm_base; + u64 eq_set_ci_base; + struct mthca_icm *fw_icm; + struct mthca_icm *aux_icm; + u16 fw_pages; + } arbel; + } fw; + + u64 ddr_start; + u64 ddr_end; + + MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock) + struct mutex cap_mask_mutex; + + void __iomem *hcr; + void __iomem *kar; + void __iomem *clr_base; + union { + struct { + void __iomem *ecr_base; + } tavor; + struct { + void __iomem *eq_arm; + void __iomem *eq_set_ci_base; + } arbel; + } eq_regs; + + struct mthca_cmd cmd; + struct mthca_limits limits; + + struct mthca_uar_table uar_table; + struct mthca_pd_table pd_table; + struct mthca_mr_table mr_table; + struct mthca_eq_table eq_table; + struct mthca_cq_table cq_table; + struct mthca_srq_table srq_table; + struct mthca_qp_table qp_table; + struct mthca_av_table av_table; + struct mthca_mcg_table mcg_table; + + struct mthca_catas_err catas_err; + + struct mthca_uar driver_uar; + struct mthca_db_table *db_tab; + struct mthca_pd driver_pd; + struct mthca_mr driver_mr; + + struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2]; + struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; + spinlock_t sm_lock; + u8 rate[MTHCA_MAX_PORTS]; + bool active; +}; + +#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG +extern int mthca_debug_level; + +#define mthca_dbg(mdev, format, arg...) \ + do { \ + if (mthca_debug_level) \ + dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \ + } while (0) + +#else /* CONFIG_INFINIBAND_MTHCA_DEBUG */ + +#define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0) + +#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */ + +#define mthca_err(mdev, format, arg...) \ + dev_err(&mdev->pdev->dev, format, ## arg) +#define mthca_info(mdev, format, arg...) \ + dev_info(&mdev->pdev->dev, format, ## arg) +#define mthca_warn(mdev, format, arg...) \ + dev_warn(&mdev->pdev->dev, format, ## arg) + +extern void __buggy_use_of_MTHCA_GET(void); +extern void __buggy_use_of_MTHCA_PUT(void); + +#define MTHCA_GET(dest, source, offset) \ + do { \ + void *__p = (char *) (source) + (offset); \ + switch (sizeof (dest)) { \ + case 1: (dest) = *(u8 *) __p; break; \ + case 2: (dest) = be16_to_cpup(__p); break; \ + case 4: (dest) = be32_to_cpup(__p); break; \ + case 8: (dest) = be64_to_cpup(__p); break; \ + default: __buggy_use_of_MTHCA_GET(); \ + } \ + } while (0) + +#define MTHCA_PUT(dest, source, offset) \ + do { \ + void *__d = ((char *) (dest) + (offset)); \ + switch (sizeof(source)) { \ + case 1: *(u8 *) __d = (source); break; \ + case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ + case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ + case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ + default: __buggy_use_of_MTHCA_PUT(); \ + } \ + } while (0) + +int mthca_reset(struct mthca_dev *mdev); + +u32 mthca_alloc(struct mthca_alloc *alloc); +void mthca_free(struct mthca_alloc *alloc, u32 obj); +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, + u32 reserved); +void mthca_alloc_cleanup(struct mthca_alloc *alloc); +void *mthca_array_get(struct mthca_array *array, int index); +int mthca_array_set(struct mthca_array *array, int index, void *value); +void mthca_array_clear(struct mthca_array *array, int index); +int mthca_array_init(struct mthca_array *array, int nent); +void mthca_array_cleanup(struct mthca_array *array, int nent); +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr); +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr); + +int mthca_init_uar_table(struct mthca_dev *dev); +int mthca_init_pd_table(struct mthca_dev *dev); +int mthca_init_mr_table(struct mthca_dev *dev); +int mthca_init_eq_table(struct mthca_dev *dev); +int mthca_init_cq_table(struct mthca_dev *dev); +int mthca_init_srq_table(struct mthca_dev *dev); +int mthca_init_qp_table(struct mthca_dev *dev); +int mthca_init_av_table(struct mthca_dev *dev); +int mthca_init_mcg_table(struct mthca_dev *dev); + +void mthca_cleanup_uar_table(struct mthca_dev *dev); +void mthca_cleanup_pd_table(struct mthca_dev *dev); +void mthca_cleanup_mr_table(struct mthca_dev *dev); +void mthca_cleanup_eq_table(struct mthca_dev *dev); +void mthca_cleanup_cq_table(struct mthca_dev *dev); +void mthca_cleanup_srq_table(struct mthca_dev *dev); +void mthca_cleanup_qp_table(struct mthca_dev *dev); +void mthca_cleanup_av_table(struct mthca_dev *dev); +void mthca_cleanup_mcg_table(struct mthca_dev *dev); + +int mthca_register_device(struct mthca_dev *dev); +void mthca_unregister_device(struct mthca_dev *dev); + +void mthca_start_catas_poll(struct mthca_dev *dev); +void mthca_stop_catas_poll(struct mthca_dev *dev); +int __mthca_restart_one(struct pci_dev *pdev); +int mthca_catas_init(void); +void mthca_catas_cleanup(void); + +int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); +void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); + +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); +void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); + +int mthca_write_mtt_size(struct mthca_dev *dev); + +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len); +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr); +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_mr *mr); +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, + u64 *buffer_list, int buffer_size_shift, + int list_len, u64 iova, u64 total_size, + u32 access, struct mthca_mr *mr); +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr); + +int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_fmr *fmr); +int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova); +void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr); +int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova); +void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr); +int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr); + +int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt); +void mthca_unmap_eq_icm(struct mthca_dev *dev); + +int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *entry); +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, + struct mthca_cq *cq); +void mthca_free_cq(struct mthca_dev *dev, + struct mthca_cq *cq); +void mthca_cq_completion(struct mthca_dev *dev, u32 cqn); +void mthca_cq_event(struct mthca_dev *dev, u32 cqn, + enum ib_event_type event_type); +void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, + struct mthca_srq *srq); +void mthca_cq_resize_copy_cqes(struct mthca_cq *cq); +int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent); +void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq); +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); +int mthca_max_srq_sge(struct mthca_dev *dev); +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type); +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +void mthca_qp_event(struct mthca_dev *dev, u32 qpn, + enum ib_event_type event_type); +int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); +int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, + int index, int *dbd, __be32 *new_wqe); +int mthca_alloc_qp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_qp_type type, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + struct mthca_qp *qp); +int mthca_alloc_sqp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + int qpn, + int port, + struct mthca_sqp *sqp); +void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); +int mthca_create_ah(struct mthca_dev *dev, + struct mthca_pd *pd, + struct ib_ah_attr *ah_attr, + struct mthca_ah *ah); +int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah); +int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, + struct ib_ud_header *header); +int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr); +int mthca_ah_grh_present(struct mthca_ah *ah); +u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port); +enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port); + +int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); +int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int mthca_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); +int mthca_create_agents(struct mthca_dev *dev); +void mthca_free_agents(struct mthca_dev *dev); + +static inline struct mthca_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mthca_dev, ib_dev); +} + +static inline int mthca_is_memfree(struct mthca_dev *dev) +{ + return dev->mthca_flags & MTHCA_FLAG_MEMFREE; +} + +#endif /* MTHCA_DEV_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h new file mode 100644 index 00000000..14f51ef9 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#define MTHCA_RD_DOORBELL 0x00 +#define MTHCA_SEND_DOORBELL 0x10 +#define MTHCA_RECEIVE_DOORBELL 0x18 +#define MTHCA_CQ_DOORBELL 0x20 +#define MTHCA_EQ_DOORBELL 0x28 + +#if BITS_PER_LONG == 64 +/* + * Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + +#define MTHCA_DECLARE_DOORBELL_LOCK(name) +#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mthca_write64_raw(__be64 val, void __iomem *dest) +{ + __raw_writeq((__force u64) val, dest); +} + +static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest, + spinlock_t *doorbell_lock) +{ + __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest); +} + +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) +{ + *(u64 *) db = *(u64 *) val; +} + +#else + +/* + * Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ + +#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name; +#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mthca_write64_raw(__be64 val, void __iomem *dest) +{ + __raw_writel(((__force u32 *) &val)[0], dest); + __raw_writel(((__force u32 *) &val)[1], dest + 4); +} + +static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest, + spinlock_t *doorbell_lock) +{ + unsigned long flags; + + hi = (__force u32) cpu_to_be32(hi); + lo = (__force u32) cpu_to_be32(lo); + + spin_lock_irqsave(doorbell_lock, flags); + __raw_writel(hi, dest); + __raw_writel(lo, dest + 4); + spin_unlock_irqrestore(doorbell_lock, flags); +} + +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) +{ + db[0] = val[0]; + wmb(); + db[1] = val[1]; +} + +#endif diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c new file mode 100644 index 00000000..7c9d35f3 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -0,0 +1,905 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_config_reg.h" + +enum { + MTHCA_NUM_ASYNC_EQE = 0x80, + MTHCA_NUM_CMD_EQE = 0x80, + MTHCA_NUM_SPARE_EQE = 0x80, + MTHCA_EQ_ENTRY_SIZE = 0x20 +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +struct mthca_eq_context { + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 tavor_pd; /* reserved for Arbel */ + u8 reserved1[3]; + u8 intr; + __be32 arbel_pd; /* lost_count for Tavor */ + __be32 lkey; + u32 reserved2[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved3[4]; +} __attribute__((packed)); + +#define MTHCA_EQ_STATUS_OK ( 0 << 28) +#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28) +#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28) +#define MTHCA_EQ_OWNER_SW ( 0 << 24) +#define MTHCA_EQ_OWNER_HW ( 1 << 24) +#define MTHCA_EQ_FLAG_TR ( 1 << 18) +#define MTHCA_EQ_FLAG_OI ( 1 << 17) +#define MTHCA_EQ_STATE_ARMED ( 1 << 8) +#define MTHCA_EQ_STATE_FIRED ( 2 << 8) +#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8) +#define MTHCA_EQ_STATE_ARBEL ( 8 << 8) + +enum { + MTHCA_EVENT_TYPE_COMP = 0x00, + MTHCA_EVENT_TYPE_PATH_MIG = 0x01, + MTHCA_EVENT_TYPE_COMM_EST = 0x02, + MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14, + MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, + MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, + MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, + MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09, + MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f, + MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e, + MTHCA_EVENT_TYPE_CMD = 0x0a +}; + +#define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG) | \ + (1ULL << MTHCA_EVENT_TYPE_COMM_EST) | \ + (1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED) | \ + (1ULL << MTHCA_EVENT_TYPE_CQ_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ + (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) +#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT)) +#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) + +#define MTHCA_EQ_DB_INC_CI (1 << 24) +#define MTHCA_EQ_DB_REQ_NOT (2 << 24) +#define MTHCA_EQ_DB_DISARM_CQ (3 << 24) +#define MTHCA_EQ_DB_SET_CI (4 << 24) +#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24) + +struct mthca_eqe { + u8 reserved1; + u8 type; + u8 reserved2; + u8 subtype; + union { + u32 raw[6]; + struct { + __be32 cqn; + } __attribute__((packed)) comp; + struct { + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; + } __attribute__((packed)) cmd; + struct { + __be32 qpn; + } __attribute__((packed)) qp; + struct { + __be32 srqn; + } __attribute__((packed)) srq; + struct { + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; + } __attribute__((packed)) cq_err; + struct { + u32 reserved1[2]; + __be32 port; + } __attribute__((packed)) port_change; + } event; + u8 reserved3[3]; + u8 owner; +} __attribute__((packed)); + +#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) +#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7) + +static inline u64 async_mask(struct mthca_dev *dev) +{ + return dev->mthca_flags & MTHCA_FLAG_SRQ ? + MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK : + MTHCA_ASYNC_EVENT_MASK; +} + +static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + /* + * This barrier makes sure that all updates to ownership bits + * done by set_eqe_hw() hit memory before the consumer index + * is updated. set_eq_ci() allows the HCA to possibly write + * more EQ entries, and we want to avoid the exceedingly + * unlikely possibility of the HCA writing an entry and then + * having set_eqe_hw() overwrite the owner field. + */ + wmb(); + mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1), + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + /* See comment in tavor_set_eq_ci() above. */ + wmb(); + __raw_writel((__force u32) cpu_to_be32(ci), + dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + if (mthca_is_memfree(dev)) + arbel_set_eq_ci(dev, eq, ci); + else + tavor_set_eq_ci(dev, eq, ci); +} + +static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) +{ + mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) +{ + writel(eqn_mask, dev->eq_regs.arbel.eq_arm); +} + +static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) +{ + if (!mthca_is_memfree(dev)) { + mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } +} + +static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE; + return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; +} + +static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq) +{ + struct mthca_eqe *eqe; + eqe = get_eqe(eq, eq->cons_index); + return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe; +} + +static inline void set_eqe_hw(struct mthca_eqe *eqe) +{ + eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW; +} + +static void port_change(struct mthca_dev *dev, int port, int active) +{ + struct ib_event record; + + mthca_dbg(dev, "Port change to %s for port %d\n", + active ? "active" : "down", port); + + record.device = &dev->ib_dev; + record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + record.element.port_num = port; + + ib_dispatch_event(&record); +} + +static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) +{ + struct mthca_eqe *eqe; + int disarm_cqn; + int eqes_found = 0; + int set_ci = 0; + + while ((eqe = next_eqe_sw(eq))) { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + rmb(); + + switch (eqe->type) { + case MTHCA_EVENT_TYPE_COMP: + disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff; + disarm_cq(dev, eq->eqn, disarm_cqn); + mthca_cq_completion(dev, disarm_cqn); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG); + break; + + case MTHCA_EVENT_TYPE_COMM_EST: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_COMM_EST); + break; + + case MTHCA_EVENT_TYPE_SQ_DRAINED: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_SQ_DRAINED); + break; + + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_LAST_WQE_REACHED); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED); + break; + + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_FATAL); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG_FAILED: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG_ERR); + break; + + case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_REQ_ERR); + break; + + case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_ACCESS_ERR); + break; + + case MTHCA_EVENT_TYPE_CMD: + mthca_cmd_event(dev, + be16_to_cpu(eqe->event.cmd.token), + eqe->event.cmd.status, + be64_to_cpu(eqe->event.cmd.out_param)); + break; + + case MTHCA_EVENT_TYPE_PORT_CHANGE: + port_change(dev, + (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3, + eqe->subtype == 0x4); + break; + + case MTHCA_EVENT_TYPE_CQ_ERROR: + mthca_warn(dev, "CQ %s on CQN %06x\n", + eqe->event.cq_err.syndrome == 1 ? + "overrun" : "access violation", + be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff); + mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn), + IB_EVENT_CQ_ERR); + break; + + case MTHCA_EVENT_TYPE_EQ_OVERFLOW: + mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn); + break; + + case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR: + case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR: + case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR: + case MTHCA_EVENT_TYPE_ECC_DETECT: + default: + mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n", + eqe->type, eqe->subtype, eq->eqn); + break; + }; + + set_eqe_hw(eqe); + ++eq->cons_index; + eqes_found = 1; + ++set_ci; + + /* + * The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MTHCA_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) { + /* + * Conditional on hca_type is OK here because + * this is a rare case, not the fast path. + */ + set_eq_ci(dev, eq, eq->cons_index); + set_ci = 0; + } + } + + /* + * Rely on caller to set consumer index so that we don't have + * to test hca_type in our interrupt handling fast path. + */ + return eqes_found; +} + +static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr) +{ + struct mthca_dev *dev = dev_ptr; + u32 ecr; + int i; + + if (dev->eq_table.clr_mask) + writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); + + ecr = readl(dev->eq_regs.tavor.ecr_base + 4); + if (!ecr) + return IRQ_NONE; + + writel(ecr, dev->eq_regs.tavor.ecr_base + + MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (ecr & dev->eq_table.eq[i].eqn_mask) { + if (mthca_eq_int(dev, &dev->eq_table.eq[i])) + tavor_set_eq_ci(dev, &dev->eq_table.eq[i], + dev->eq_table.eq[i].cons_index); + tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); + } + + return IRQ_HANDLED; +} + +static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr) +{ + struct mthca_eq *eq = eq_ptr; + struct mthca_dev *dev = eq->dev; + + mthca_eq_int(dev, eq); + tavor_set_eq_ci(dev, eq, eq->cons_index); + tavor_eq_req_not(dev, eq->eqn); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} + +static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr) +{ + struct mthca_dev *dev = dev_ptr; + int work = 0; + int i; + + if (dev->eq_table.clr_mask) + writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (mthca_eq_int(dev, &dev->eq_table.eq[i])) { + work = 1; + arbel_set_eq_ci(dev, &dev->eq_table.eq[i], + dev->eq_table.eq[i].cons_index); + } + + arbel_eq_req_not(dev, dev->eq_table.arm_mask); + + return IRQ_RETVAL(work); +} + +static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr) +{ + struct mthca_eq *eq = eq_ptr; + struct mthca_dev *dev = eq->dev; + + mthca_eq_int(dev, eq); + arbel_set_eq_ci(dev, eq, eq->cons_index); + arbel_eq_req_not(dev, eq->eqn_mask); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} + +static int mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq) +{ + int npages; + u64 *dma_list = NULL; + dma_addr_t t; + struct mthca_mailbox *mailbox; + struct mthca_eq_context *eq_context; + int err = -ENOMEM; + int i; + + eq->dev = dev; + eq->nent = roundup_pow_of_two(max(nent, 2)); + npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; + + eq->page_list = kmalloc(npages * sizeof *eq->page_list, + GFP_KERNEL); + if (!eq->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + eq->page_list[i].buf = NULL; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_free; + eq_context = mailbox->buf; + + for (i = 0; i < npages; ++i) { + eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, + PAGE_SIZE, &t, GFP_KERNEL); + if (!eq->page_list[i].buf) + goto err_out_free_pages; + + dma_list[i] = t; + dma_unmap_addr_set(&eq->page_list[i], mapping, t); + + clear_page(eq->page_list[i].buf); + } + + for (i = 0; i < eq->nent; ++i) + set_eqe_hw(get_eqe(eq, i)); + + eq->eqn = mthca_alloc(&dev->eq_table.alloc); + if (eq->eqn == -1) + goto err_out_free_pages; + + err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, + dma_list, PAGE_SHIFT, npages, + 0, npages * PAGE_SIZE, + MTHCA_MPT_FLAG_LOCAL_WRITE | + MTHCA_MPT_FLAG_LOCAL_READ, + &eq->mr); + if (err) + goto err_out_free_eq; + + memset(eq_context, 0, sizeof *eq_context); + eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK | + MTHCA_EQ_OWNER_HW | + MTHCA_EQ_STATE_ARMED | + MTHCA_EQ_FLAG_TR); + if (mthca_is_memfree(dev)) + eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL); + + eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24); + if (mthca_is_memfree(dev)) { + eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num); + } else { + eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); + eq_context->tavor_pd = cpu_to_be32(dev->driver_pd.pd_num); + } + eq_context->intr = intr; + eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); + + err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn); + if (err) { + mthca_warn(dev, "SW2HW_EQ returned %d\n", err); + goto err_out_free_mr; + } + + kfree(dma_list); + mthca_free_mailbox(dev, mailbox); + + eq->eqn_mask = swab32(1 << eq->eqn); + eq->cons_index = 0; + + dev->eq_table.arm_mask |= eq->eqn_mask; + + mthca_dbg(dev, "Allocated EQ %d with %d entries\n", + eq->eqn, eq->nent); + + return err; + + err_out_free_mr: + mthca_free_mr(dev, &eq->mr); + + err_out_free_eq: + mthca_free(&dev->eq_table.alloc, eq->eqn); + + err_out_free_pages: + for (i = 0; i < npages; ++i) + if (eq->page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + dma_unmap_addr(&eq->page_list[i], + mapping)); + + mthca_free_mailbox(dev, mailbox); + + err_out_free: + kfree(eq->page_list); + kfree(dma_list); + + err_out: + return err; +} + +static void mthca_free_eq(struct mthca_dev *dev, + struct mthca_eq *eq) +{ + struct mthca_mailbox *mailbox; + int err; + int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / + PAGE_SIZE; + int i; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return; + + err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn); + if (err) + mthca_warn(dev, "HW2SW_EQ returned %d\n", err); + + dev->eq_table.arm_mask &= ~eq->eqn_mask; + + if (0) { + mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn); + for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpup(mailbox->buf + i * 4)); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } + + mthca_free_mr(dev, &eq->mr); + for (i = 0; i < npages; ++i) + pci_free_consistent(dev->pdev, PAGE_SIZE, + eq->page_list[i].buf, + dma_unmap_addr(&eq->page_list[i], mapping)); + + kfree(eq->page_list); + mthca_free_mailbox(dev, mailbox); +} + +static void mthca_free_irqs(struct mthca_dev *dev) +{ + int i; + + if (dev->eq_table.have_irq) + free_irq(dev->pdev->irq, dev); + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (dev->eq_table.eq[i].have_irq) { + free_irq(dev->eq_table.eq[i].msi_x_vector, + dev->eq_table.eq + i); + dev->eq_table.eq[i].have_irq = 0; + } +} + +static int mthca_map_reg(struct mthca_dev *dev, + unsigned long offset, unsigned long size, + void __iomem **map) +{ + phys_addr_t base = pci_resource_start(dev->pdev, 0); + + *map = ioremap(base + offset, size); + if (!*map) + return -ENOMEM; + + return 0; +} + +static int mthca_map_eq_regs(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) { + /* + * We assume that the EQ arm and EQ set CI registers + * fall within the first BAR. We can't trust the + * values firmware gives us, since those addresses are + * valid on the HCA's side of the PCI bus but not + * necessarily the host side. + */ + if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + &dev->clr_base)) { + mthca_err(dev, "Couldn't map interrupt clear register, " + "aborting.\n"); + return -ENOMEM; + } + + /* + * Add 4 because we limit ourselves to EQs 0 ... 31, + * so we only need the low word of the register. + */ + if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & + dev->fw.arbel.eq_arm_base) + 4, 4, + &dev->eq_regs.arbel.eq_arm)) { + mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); + iounmap(dev->clr_base); + return -ENOMEM; + } + + if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & + dev->fw.arbel.eq_set_ci_base, + MTHCA_EQ_SET_CI_SIZE, + &dev->eq_regs.arbel.eq_set_ci_base)) { + mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); + return -ENOMEM; + } + } else { + if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, + &dev->clr_base)) { + mthca_err(dev, "Couldn't map interrupt clear register, " + "aborting.\n"); + return -ENOMEM; + } + + if (mthca_map_reg(dev, MTHCA_ECR_BASE, + MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, + &dev->eq_regs.tavor.ecr_base)) { + mthca_err(dev, "Couldn't map ecr register, " + "aborting.\n"); + iounmap(dev->clr_base); + return -ENOMEM; + } + } + + return 0; + +} + +static void mthca_unmap_eq_regs(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) { + iounmap(dev->eq_regs.arbel.eq_set_ci_base); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); + } else { + iounmap(dev->eq_regs.tavor.ecr_base); + iounmap(dev->clr_base); + } +} + +int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) +{ + int ret; + + /* + * We assume that mapping one page is enough for the whole EQ + * context table. This is fine with all current HCAs, because + * we only use 32 EQs and each EQ uses 32 bytes of context + * memory, or 1 KB total. + */ + dev->eq_table.icm_virt = icm_virt; + dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER); + if (!dev->eq_table.icm_page) + return -ENOMEM; + dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) { + __free_page(dev->eq_table.icm_page); + return -ENOMEM; + } + + ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt); + if (ret) { + pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + __free_page(dev->eq_table.icm_page); + } + + return ret; +} + +void mthca_unmap_eq_icm(struct mthca_dev *dev) +{ + mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1); + pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + __free_page(dev->eq_table.icm_page); +} + +int mthca_init_eq_table(struct mthca_dev *dev) +{ + int err; + u8 intr; + int i; + + err = mthca_alloc_init(&dev->eq_table.alloc, + dev->limits.num_eqs, + dev->limits.num_eqs - 1, + dev->limits.reserved_eqs); + if (err) + return err; + + err = mthca_map_eq_regs(dev); + if (err) + goto err_out_free; + + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { + dev->eq_table.clr_mask = 0; + } else { + dev->eq_table.clr_mask = + swab32(1 << (dev->eq_table.inta_pin & 31)); + dev->eq_table.clr_int = dev->clr_base + + (dev->eq_table.inta_pin < 32 ? 4 : 0); + } + + dev->eq_table.arm_mask = 0; + + intr = dev->eq_table.inta_pin; + + err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, + &dev->eq_table.eq[MTHCA_EQ_COMP]); + if (err) + goto err_out_unmap; + + err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr, + &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + if (err) + goto err_out_comp; + + err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr, + &dev->eq_table.eq[MTHCA_EQ_CMD]); + if (err) + goto err_out_async; + + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { + static const char *eq_name[] = { + [MTHCA_EQ_COMP] = DRV_NAME "-comp", + [MTHCA_EQ_ASYNC] = DRV_NAME "-async", + [MTHCA_EQ_CMD] = DRV_NAME "-cmd" + }; + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + snprintf(dev->eq_table.eq[i].irq_name, + IB_DEVICE_NAME_MAX, + "%s@pci:%s", eq_name[i], + pci_name(dev->pdev)); + err = request_irq(dev->eq_table.eq[i].msi_x_vector, + mthca_is_memfree(dev) ? + mthca_arbel_msi_x_interrupt : + mthca_tavor_msi_x_interrupt, + 0, dev->eq_table.eq[i].irq_name, + dev->eq_table.eq + i); + if (err) + goto err_out_cmd; + dev->eq_table.eq[i].have_irq = 1; + } + } else { + snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX, + DRV_NAME "@pci:%s", pci_name(dev->pdev)); + err = request_irq(dev->pdev->irq, + mthca_is_memfree(dev) ? + mthca_arbel_interrupt : + mthca_tavor_interrupt, + IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev); + if (err) + goto err_out_cmd; + dev->eq_table.have_irq = 1; + } + + err = mthca_MAP_EQ(dev, async_mask(dev), + 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); + if (err) + mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err); + + err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn); + if (err) + mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (mthca_is_memfree(dev)) + arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); + else + tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); + + return 0; + +err_out_cmd: + mthca_free_irqs(dev); + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]); + +err_out_async: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + +err_out_comp: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]); + +err_out_unmap: + mthca_unmap_eq_regs(dev); + +err_out_free: + mthca_alloc_cleanup(&dev->eq_table.alloc); + return err; +} + +void mthca_cleanup_eq_table(struct mthca_dev *dev) +{ + int i; + + mthca_free_irqs(dev); + + mthca_MAP_EQ(dev, async_mask(dev), + 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); + mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + mthca_free_eq(dev, &dev->eq_table.eq[i]); + + mthca_unmap_eq_regs(dev); + + mthca_alloc_cleanup(&dev->eq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c new file mode 100644 index 00000000..b6f7f457 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include +#include +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +enum { + MTHCA_VENDOR_CLASS1 = 0x9, + MTHCA_VENDOR_CLASS2 = 0xa +}; + +static int mthca_update_rate(struct mthca_dev *dev, u8 port_num) +{ + struct ib_port_attr *tprops = NULL; + int ret; + + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); + if (!tprops) + return -ENOMEM; + + ret = ib_query_port(&dev->ib_dev, port_num, tprops); + if (ret) { + printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n", + ret, dev->ib_dev.name, port_num); + goto out; + } + + dev->rate[port_num - 1] = tprops->active_speed * + ib_width_enum_to_int(tprops->active_width); + +out: + kfree(tprops); + return ret; +} + +static void update_sm_ah(struct mthca_dev *dev, + u8 port_num, u16 lid, u8 sl) +{ + struct ib_ah *new_ah; + struct ib_ah_attr ah_attr; + unsigned long flags; + + if (!dev->send_agent[port_num - 1][0]) + return; + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = lid; + ah_attr.sl = sl; + ah_attr.port_num = port_num; + + new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, + &ah_attr); + if (IS_ERR(new_ah)) + return; + + spin_lock_irqsave(&dev->sm_lock, flags); + if (dev->sm_ah[port_num - 1]) + ib_destroy_ah(dev->sm_ah[port_num - 1]); + dev->sm_ah[port_num - 1] = new_ah; + spin_unlock_irqrestore(&dev->sm_lock, flags); +} + +/* + * Snoop SM MADs for port info and P_Key table sets, so we can + * synthesize LID change and P_Key change events. + */ +static void smp_snoop(struct ib_device *ibdev, + u8 port_num, + struct ib_mad *mad, + u16 prev_lid) +{ + struct ib_event event; + + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_SET) { + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { + struct ib_port_info *pinfo = + (struct ib_port_info *) ((struct ib_smp *) mad)->data; + u16 lid = be16_to_cpu(pinfo->lid); + + mthca_update_rate(to_mdev(ibdev), port_num); + update_sm_ah(to_mdev(ibdev), port_num, + be16_to_cpu(pinfo->sm_lid), + pinfo->neighbormtu_mastersmsl & 0xf); + + event.device = ibdev; + event.element.port_num = port_num; + + if (pinfo->clientrereg_resv_subnetto & 0x80) { + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); + } + + if (prev_lid != lid) { + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + } + + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { + event.device = ibdev; + event.event = IB_EVENT_PKEY_CHANGE; + event.element.port_num = port_num; + ib_dispatch_event(&event); + } + } +} + +static void node_desc_override(struct ib_device *dev, + struct ib_mad *mad) +{ + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && + mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { + mutex_lock(&to_mdev(dev)->cap_mask_mutex); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + mutex_unlock(&to_mdev(dev)->cap_mask_mutex); + } +} + +static void forward_trap(struct mthca_dev *dev, + u8 port_num, + struct ib_mad *mad) +{ + int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; + struct ib_mad_send_buf *send_buf; + struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; + int ret; + unsigned long flags; + + if (agent) { + send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + return; + /* + * We rely here on the fact that MLX QPs don't use the + * address handle after the send is posted (this is + * wrong following the IB spec strictly, but we know + * it's OK for our devices). + */ + spin_lock_irqsave(&dev->sm_lock, flags); + memcpy(send_buf->mad, mad, sizeof *mad); + if ((send_buf->ah = dev->sm_ah[port_num - 1])) + ret = ib_post_send_mad(send_buf, NULL); + else + ret = -EINVAL; + spin_unlock_irqrestore(&dev->sm_lock, flags); + + if (ret) + ib_free_send_mad(send_buf); + } +} + +int mthca_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + int err; + u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + u16 prev_lid = 0; + struct ib_port_attr pattr; + + /* Forward locally generated traps to the SM */ + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && + slid == 0) { + forward_trap(to_mdev(ibdev), port_num, in_mad); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } + + /* + * Only handle SM gets, sets and trap represses for SM class + * + * Only handle PMA and Mellanox vendor-specific class gets and + * sets for other classes. + */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* + * Don't process SMInfo queries or vendor-specific + * MADs -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + IB_SMP_ATTR_VENDOR_MASK)) + return IB_MAD_RESULT_SUCCESS; + } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } else + return IB_MAD_RESULT_SUCCESS; + if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && + in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + !ib_query_port(ibdev, port_num, &pattr)) + prev_lid = pattr.lid; + + err = mthca_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err == -EBADMSG) + return IB_MAD_RESULT_SUCCESS; + else if (err) { + mthca_err(to_mdev(ibdev), "MAD_IFC returned %d\n", err); + return IB_MAD_RESULT_FAILURE; + } + + if (!out_mad->mad_hdr.status) { + smp_snoop(ibdev, port_num, in_mad, prev_lid); + node_desc_override(ibdev, out_mad); + } + + /* set return bit in status of directed route responses */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + ib_free_send_mad(mad_send_wc->send_buf); +} + +int mthca_create_agents(struct mthca_dev *dev) +{ + struct ib_mad_agent *agent; + int p, q; + int ret; + + spin_lock_init(&dev->sm_lock); + + for (p = 0; p < dev->limits.num_ports; ++p) + for (q = 0; q <= 1; ++q) { + agent = ib_register_mad_agent(&dev->ib_dev, p + 1, + q ? IB_QPT_GSI : IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + dev->send_agent[p][q] = agent; + } + + + for (p = 1; p <= dev->limits.num_ports; ++p) { + ret = mthca_update_rate(dev, p); + if (ret) { + mthca_err(dev, "Failed to obtain port %d rate." + " aborting.\n", p); + goto err; + } + } + + return 0; + +err: + for (p = 0; p < dev->limits.num_ports; ++p) + for (q = 0; q <= 1; ++q) + if (dev->send_agent[p][q]) + ib_unregister_mad_agent(dev->send_agent[p][q]); + + return ret; +} + +void mthca_free_agents(struct mthca_dev *dev) +{ + struct ib_mad_agent *agent; + int p, q; + + for (p = 0; p < dev->limits.num_ports; ++p) { + for (q = 0; q <= 1; ++q) { + agent = dev->send_agent[p][q]; + dev->send_agent[p][q] = NULL; + ib_unregister_mad_agent(agent); + } + + if (dev->sm_ah[p]) + ib_destroy_ah(dev->sm_ah[p]); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c new file mode 100644 index 00000000..aa12a533 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -0,0 +1,1280 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "mthca_dev.h" +#include "mthca_config_reg.h" +#include "mthca_cmd.h" +#include "mthca_profile.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG + +int mthca_debug_level = 0; +module_param_named(debug_level, mthca_debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + +#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */ + +#ifdef CONFIG_PCI_MSI + +static int msi_x = 1; +module_param(msi_x, int, 0444); +MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); + +#else /* CONFIG_PCI_MSI */ + +#define msi_x (0) + +#endif /* CONFIG_PCI_MSI */ + +static int tune_pci = 0; +module_param(tune_pci, int, 0444); +MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero"); + +DEFINE_MUTEX(mthca_device_mutex); + +#define MTHCA_DEFAULT_NUM_QP (1 << 16) +#define MTHCA_DEFAULT_RDB_PER_QP (1 << 2) +#define MTHCA_DEFAULT_NUM_CQ (1 << 16) +#define MTHCA_DEFAULT_NUM_MCG (1 << 13) +#define MTHCA_DEFAULT_NUM_MPT (1 << 17) +#define MTHCA_DEFAULT_NUM_MTT (1 << 20) +#define MTHCA_DEFAULT_NUM_UDAV (1 << 15) +#define MTHCA_DEFAULT_NUM_RESERVED_MTTS (1 << 18) +#define MTHCA_DEFAULT_NUM_UARC_SIZE (1 << 18) + +static struct mthca_profile hca_profile = { + .num_qp = MTHCA_DEFAULT_NUM_QP, + .rdb_per_qp = MTHCA_DEFAULT_RDB_PER_QP, + .num_cq = MTHCA_DEFAULT_NUM_CQ, + .num_mcg = MTHCA_DEFAULT_NUM_MCG, + .num_mpt = MTHCA_DEFAULT_NUM_MPT, + .num_mtt = MTHCA_DEFAULT_NUM_MTT, + .num_udav = MTHCA_DEFAULT_NUM_UDAV, /* Tavor only */ + .fmr_reserved_mtts = MTHCA_DEFAULT_NUM_RESERVED_MTTS, /* Tavor only */ + .uarc_size = MTHCA_DEFAULT_NUM_UARC_SIZE, /* Arbel only */ +}; + +module_param_named(num_qp, hca_profile.num_qp, int, 0444); +MODULE_PARM_DESC(num_qp, "maximum number of QPs per HCA"); + +module_param_named(rdb_per_qp, hca_profile.rdb_per_qp, int, 0444); +MODULE_PARM_DESC(rdb_per_qp, "number of RDB buffers per QP"); + +module_param_named(num_cq, hca_profile.num_cq, int, 0444); +MODULE_PARM_DESC(num_cq, "maximum number of CQs per HCA"); + +module_param_named(num_mcg, hca_profile.num_mcg, int, 0444); +MODULE_PARM_DESC(num_mcg, "maximum number of multicast groups per HCA"); + +module_param_named(num_mpt, hca_profile.num_mpt, int, 0444); +MODULE_PARM_DESC(num_mpt, + "maximum number of memory protection table entries per HCA"); + +module_param_named(num_mtt, hca_profile.num_mtt, int, 0444); +MODULE_PARM_DESC(num_mtt, + "maximum number of memory translation table segments per HCA"); + +module_param_named(num_udav, hca_profile.num_udav, int, 0444); +MODULE_PARM_DESC(num_udav, "maximum number of UD address vectors per HCA"); + +module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); +MODULE_PARM_DESC(fmr_reserved_mtts, + "number of memory translation table segments reserved for FMR"); + +static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + +static char mthca_version[] __devinitdata = + DRV_NAME ": Mellanox InfiniBand HCA driver v" + DRV_VERSION " (" DRV_RELDATE ")\n"; + +static int mthca_tune_pci(struct mthca_dev *mdev) +{ + if (!tune_pci) + return 0; + + /* First try to max out Read Byte Count */ + if (pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX)) { + if (pcix_set_mmrbc(mdev->pdev, pcix_get_max_mmrbc(mdev->pdev))) { + mthca_err(mdev, "Couldn't set PCI-X max read count, " + "aborting.\n"); + return -ENODEV; + } + } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) + mthca_info(mdev, "No PCI-X capability, not setting RBC.\n"); + + if (pci_is_pcie(mdev->pdev)) { + if (pcie_set_readrq(mdev->pdev, 4096)) { + mthca_err(mdev, "Couldn't write PCI Express read request, " + "aborting.\n"); + return -ENODEV; + } + } else if (mdev->mthca_flags & MTHCA_FLAG_PCIE) + mthca_info(mdev, "No PCI Express capability, " + "not setting Max Read Request Size.\n"); + + return 0; +} + +static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) +{ + int err; + + mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8; + err = mthca_QUERY_DEV_LIM(mdev, dev_lim); + if (err) { + mthca_err(mdev, "QUERY_DEV_LIM command returned %d" + ", aborting.\n", err); + return err; + } + if (dev_lim->min_page_sz > PAGE_SIZE) { + mthca_err(mdev, "HCA minimum page size of %d bigger than " + "kernel PAGE_SIZE of %ld, aborting.\n", + dev_lim->min_page_sz, PAGE_SIZE); + return -ENODEV; + } + if (dev_lim->num_ports > MTHCA_MAX_PORTS) { + mthca_err(mdev, "HCA has %d ports, but we only support %d, " + "aborting.\n", + dev_lim->num_ports, MTHCA_MAX_PORTS); + return -ENODEV; + } + + if (dev_lim->uar_size > pci_resource_len(mdev->pdev, 2)) { + mthca_err(mdev, "HCA reported UAR size of 0x%x bigger than " + "PCI resource 2 size of 0x%llx, aborting.\n", + dev_lim->uar_size, + (unsigned long long)pci_resource_len(mdev->pdev, 2)); + return -ENODEV; + } + + mdev->limits.num_ports = dev_lim->num_ports; + mdev->limits.vl_cap = dev_lim->max_vl; + mdev->limits.mtu_cap = dev_lim->max_mtu; + mdev->limits.gid_table_len = dev_lim->max_gids; + mdev->limits.pkey_table_len = dev_lim->max_pkeys; + mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; + /* + * Need to allow for worst case send WQE overhead and check + * whether max_desc_sz imposes a lower limit than max_sg; UD + * send has the biggest overhead. + */ + mdev->limits.max_sg = min_t(int, dev_lim->max_sg, + (dev_lim->max_desc_sz - + sizeof (struct mthca_next_seg) - + (mthca_is_memfree(mdev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg))) / + sizeof (struct mthca_data_seg)); + mdev->limits.max_wqes = dev_lim->max_qp_sz; + mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; + mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; + mdev->limits.reserved_srqs = dev_lim->reserved_srqs; + mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + mdev->limits.max_desc_sz = dev_lim->max_desc_sz; + mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev); + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; + mdev->limits.reserved_cqs = dev_lim->reserved_cqs; + mdev->limits.reserved_eqs = dev_lim->reserved_eqs; + mdev->limits.reserved_mtts = dev_lim->reserved_mtts; + mdev->limits.reserved_mrws = dev_lim->reserved_mrws; + mdev->limits.reserved_uars = dev_lim->reserved_uars; + mdev->limits.reserved_pds = dev_lim->reserved_pds; + mdev->limits.port_width_cap = dev_lim->max_port_width; + mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); + mdev->limits.flags = dev_lim->flags; + /* + * For old FW that doesn't return static rate support, use a + * value of 0x3 (only static rate values of 0 or 1 are handled), + * except on Sinai, where even old FW can handle static rate + * values of 2 and 3. + */ + if (dev_lim->stat_rate_support) + mdev->limits.stat_rate_support = dev_lim->stat_rate_support; + else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + mdev->limits.stat_rate_support = 0xf; + else + mdev->limits.stat_rate_support = 0x3; + + /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. + May be doable since hardware supports it for SRQ. + + IB_DEVICE_N_NOTIFY_CQ is supported by hardware but not by driver. + + IB_DEVICE_SRQ_RESIZE is supported by hardware but SRQ is not + supported by driver. */ + mdev->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | + IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN; + + if (dev_lim->flags & DEV_LIM_FLAG_BAD_PKEY_CNTR) + mdev->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; + + if (dev_lim->flags & DEV_LIM_FLAG_BAD_QKEY_CNTR) + mdev->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; + + if (dev_lim->flags & DEV_LIM_FLAG_RAW_MULTI) + mdev->device_cap_flags |= IB_DEVICE_RAW_MULTI; + + if (dev_lim->flags & DEV_LIM_FLAG_AUTO_PATH_MIG) + mdev->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + + if (dev_lim->flags & DEV_LIM_FLAG_UD_AV_PORT_ENFORCE) + mdev->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; + + if (dev_lim->flags & DEV_LIM_FLAG_SRQ) + mdev->mthca_flags |= MTHCA_FLAG_SRQ; + + if (mthca_is_memfree(mdev)) + if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM) + mdev->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + + return 0; +} + +static int mthca_init_tavor(struct mthca_dev *mdev) +{ + s64 size; + int err; + struct mthca_dev_lim dev_lim; + struct mthca_profile profile; + struct mthca_init_hca_param init_hca; + + err = mthca_SYS_EN(mdev); + if (err) { + mthca_err(mdev, "SYS_EN command returned %d, aborting.\n", err); + return err; + } + + err = mthca_QUERY_FW(mdev); + if (err) { + mthca_err(mdev, "QUERY_FW command returned %d," + " aborting.\n", err); + goto err_disable; + } + err = mthca_QUERY_DDR(mdev); + if (err) { + mthca_err(mdev, "QUERY_DDR command returned %d, aborting.\n", err); + goto err_disable; + } + + err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + mthca_err(mdev, "QUERY_DEV_LIM command returned %d, aborting.\n", err); + goto err_disable; + } + + profile = hca_profile; + profile.num_uar = dev_lim.uar_size / PAGE_SIZE; + profile.uarc_size = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; + + size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); + if (size < 0) { + err = size; + goto err_disable; + } + + err = mthca_INIT_HCA(mdev, &init_hca); + if (err) { + mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err); + goto err_disable; + } + + return 0; + +err_disable: + mthca_SYS_DIS(mdev); + + return err; +} + +static int mthca_load_fw(struct mthca_dev *mdev) +{ + int err; + + /* FIXME: use HCA-attached memory for FW if present */ + + mdev->fw.arbel.fw_icm = + mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages, + GFP_HIGHUSER | __GFP_NOWARN, 0); + if (!mdev->fw.arbel.fw_icm) { + mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); + return -ENOMEM; + } + + err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm); + if (err) { + mthca_err(mdev, "MAP_FA command returned %d, aborting.\n", err); + goto err_free; + } + err = mthca_RUN_FW(mdev); + if (err) { + mthca_err(mdev, "RUN_FW command returned %d, aborting.\n", err); + goto err_unmap_fa; + } + + return 0; + +err_unmap_fa: + mthca_UNMAP_FA(mdev); + +err_free: + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); + return err; +} + +static int mthca_init_icm(struct mthca_dev *mdev, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca, + u64 icm_size) +{ + u64 aux_pages; + int err; + + err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages); + if (err) { + mthca_err(mdev, "SET_ICM_SIZE command returned %d, aborting.\n", err); + return err; + } + + mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n", + (unsigned long long) icm_size >> 10, + (unsigned long long) aux_pages << 2); + + mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages, + GFP_HIGHUSER | __GFP_NOWARN, 0); + if (!mdev->fw.arbel.aux_icm) { + mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n"); + return -ENOMEM; + } + + err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm); + if (err) { + mthca_err(mdev, "MAP_ICM_AUX returned %d, aborting.\n", err); + goto err_free_aux; + } + + err = mthca_map_eq_icm(mdev, init_hca->eqc_base); + if (err) { + mthca_err(mdev, "Failed to map EQ context memory, aborting.\n"); + goto err_unmap_aux; + } + + /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, + dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; + + mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, + mdev->limits.mtt_seg_size, + mdev->limits.num_mtt_segs, + mdev->limits.reserved_mtts, + 1, 0); + if (!mdev->mr_table.mtt_table) { + mthca_err(mdev, "Failed to map MTT context memory, aborting.\n"); + err = -ENOMEM; + goto err_unmap_eq; + } + + mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base, + dev_lim->mpt_entry_sz, + mdev->limits.num_mpts, + mdev->limits.reserved_mrws, + 1, 1); + if (!mdev->mr_table.mpt_table) { + mthca_err(mdev, "Failed to map MPT context memory, aborting.\n"); + err = -ENOMEM; + goto err_unmap_mtt; + } + + mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base, + dev_lim->qpc_entry_sz, + mdev->limits.num_qps, + mdev->limits.reserved_qps, + 0, 0); + if (!mdev->qp_table.qp_table) { + mthca_err(mdev, "Failed to map QP context memory, aborting.\n"); + err = -ENOMEM; + goto err_unmap_mpt; + } + + mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base, + dev_lim->eqpc_entry_sz, + mdev->limits.num_qps, + mdev->limits.reserved_qps, + 0, 0); + if (!mdev->qp_table.eqp_table) { + mthca_err(mdev, "Failed to map EQP context memory, aborting.\n"); + err = -ENOMEM; + goto err_unmap_qp; + } + + mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base, + MTHCA_RDB_ENTRY_SIZE, + mdev->limits.num_qps << + mdev->qp_table.rdb_shift, 0, + 0, 0); + if (!mdev->qp_table.rdb_table) { + mthca_err(mdev, "Failed to map RDB context memory, aborting\n"); + err = -ENOMEM; + goto err_unmap_eqp; + } + + mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, + dev_lim->cqc_entry_sz, + mdev->limits.num_cqs, + mdev->limits.reserved_cqs, + 0, 0); + if (!mdev->cq_table.table) { + mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); + err = -ENOMEM; + goto err_unmap_rdb; + } + + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) { + mdev->srq_table.table = + mthca_alloc_icm_table(mdev, init_hca->srqc_base, + dev_lim->srq_entry_sz, + mdev->limits.num_srqs, + mdev->limits.reserved_srqs, + 0, 0); + if (!mdev->srq_table.table) { + mthca_err(mdev, "Failed to map SRQ context memory, " + "aborting.\n"); + err = -ENOMEM; + goto err_unmap_cq; + } + } + + /* + * It's not strictly required, but for simplicity just map the + * whole multicast group table now. The table isn't very big + * and it's a lot easier than trying to track ref counts. + */ + mdev->mcg_table.table = mthca_alloc_icm_table(mdev, init_hca->mc_base, + MTHCA_MGM_ENTRY_SIZE, + mdev->limits.num_mgms + + mdev->limits.num_amgms, + mdev->limits.num_mgms + + mdev->limits.num_amgms, + 0, 0); + if (!mdev->mcg_table.table) { + mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); + err = -ENOMEM; + goto err_unmap_srq; + } + + return 0; + +err_unmap_srq: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + +err_unmap_cq: + mthca_free_icm_table(mdev, mdev->cq_table.table); + +err_unmap_rdb: + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + +err_unmap_eqp: + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + +err_unmap_qp: + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + +err_unmap_mpt: + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + +err_unmap_mtt: + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + +err_unmap_eq: + mthca_unmap_eq_icm(mdev); + +err_unmap_aux: + mthca_UNMAP_ICM_AUX(mdev); + +err_free_aux: + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); + + return err; +} + +static void mthca_free_icms(struct mthca_dev *mdev) +{ + + mthca_free_icm_table(mdev, mdev->mcg_table.table); + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + mthca_free_icm_table(mdev, mdev->cq_table.table); + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + mthca_unmap_eq_icm(mdev); + + mthca_UNMAP_ICM_AUX(mdev); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); +} + +static int mthca_init_arbel(struct mthca_dev *mdev) +{ + struct mthca_dev_lim dev_lim; + struct mthca_profile profile; + struct mthca_init_hca_param init_hca; + s64 icm_size; + int err; + + err = mthca_QUERY_FW(mdev); + if (err) { + mthca_err(mdev, "QUERY_FW command failed %d, aborting.\n", err); + return err; + } + + err = mthca_ENABLE_LAM(mdev); + if (err == -EAGAIN) { + mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n"); + mdev->mthca_flags |= MTHCA_FLAG_NO_LAM; + } else if (err) { + mthca_err(mdev, "ENABLE_LAM returned %d, aborting.\n", err); + return err; + } + + err = mthca_load_fw(mdev); + if (err) { + mthca_err(mdev, "Loading FW returned %d, aborting.\n", err); + goto err_disable; + } + + err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + mthca_err(mdev, "QUERY_DEV_LIM returned %d, aborting.\n", err); + goto err_stop_fw; + } + + profile = hca_profile; + profile.num_uar = dev_lim.uar_size / PAGE_SIZE; + profile.num_udav = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; + + icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); + if (icm_size < 0) { + err = icm_size; + goto err_stop_fw; + } + + err = mthca_init_icm(mdev, &dev_lim, &init_hca, icm_size); + if (err) + goto err_stop_fw; + + err = mthca_INIT_HCA(mdev, &init_hca); + if (err) { + mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err); + goto err_free_icm; + } + + return 0; + +err_free_icm: + mthca_free_icms(mdev); + +err_stop_fw: + mthca_UNMAP_FA(mdev); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); + +err_disable: + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev); + + return err; +} + +static void mthca_close_hca(struct mthca_dev *mdev) +{ + mthca_CLOSE_HCA(mdev, 0); + + if (mthca_is_memfree(mdev)) { + mthca_free_icms(mdev); + + mthca_UNMAP_FA(mdev); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); + + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev); + } else + mthca_SYS_DIS(mdev); +} + +static int mthca_init_hca(struct mthca_dev *mdev) +{ + int err; + struct mthca_adapter adapter; + + if (mthca_is_memfree(mdev)) + err = mthca_init_arbel(mdev); + else + err = mthca_init_tavor(mdev); + + if (err) + return err; + + err = mthca_QUERY_ADAPTER(mdev, &adapter); + if (err) { + mthca_err(mdev, "QUERY_ADAPTER command returned %d, aborting.\n", err); + goto err_close; + } + + mdev->eq_table.inta_pin = adapter.inta_pin; + if (!mthca_is_memfree(mdev)) + mdev->rev_id = adapter.revision_id; + memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); + + return 0; + +err_close: + mthca_close_hca(mdev); + return err; +} + +static int mthca_setup_hca(struct mthca_dev *dev) +{ + int err; + + MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock); + + err = mthca_init_uar_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "user access region table, aborting.\n"); + return err; + } + + err = mthca_uar_alloc(dev, &dev->driver_uar); + if (err) { + mthca_err(dev, "Failed to allocate driver access region, " + "aborting.\n"); + goto err_uar_table_free; + } + + dev->kar = ioremap((phys_addr_t) dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + if (!dev->kar) { + mthca_err(dev, "Couldn't map kernel access region, " + "aborting.\n"); + err = -ENOMEM; + goto err_uar_free; + } + + err = mthca_init_pd_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "protection domain table, aborting.\n"); + goto err_kar_unmap; + } + + err = mthca_init_mr_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "memory region table, aborting.\n"); + goto err_pd_table_free; + } + + err = mthca_pd_alloc(dev, 1, &dev->driver_pd); + if (err) { + mthca_err(dev, "Failed to create driver PD, " + "aborting.\n"); + goto err_mr_table_free; + } + + err = mthca_init_eq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "event queue table, aborting.\n"); + goto err_pd_free; + } + + err = mthca_cmd_use_events(dev); + if (err) { + mthca_err(dev, "Failed to switch to event-driven " + "firmware commands, aborting.\n"); + goto err_eq_table_free; + } + + err = mthca_NOP(dev); + if (err) { + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { + mthca_warn(dev, "NOP command failed to generate interrupt " + "(IRQ %d).\n", + dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector); + mthca_warn(dev, "Trying again with MSI-X disabled.\n"); + } else { + mthca_err(dev, "NOP command failed to generate interrupt " + "(IRQ %d), aborting.\n", + dev->pdev->irq); + mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n"); + } + + goto err_cmd_poll; + } + + mthca_dbg(dev, "NOP command IRQ test passed\n"); + + err = mthca_init_cq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "completion queue table, aborting.\n"); + goto err_cmd_poll; + } + + err = mthca_init_srq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "shared receive queue table, aborting.\n"); + goto err_cq_table_free; + } + + err = mthca_init_qp_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "queue pair table, aborting.\n"); + goto err_srq_table_free; + } + + err = mthca_init_av_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "address vector table, aborting.\n"); + goto err_qp_table_free; + } + + err = mthca_init_mcg_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "multicast group table, aborting.\n"); + goto err_av_table_free; + } + + return 0; + +err_av_table_free: + mthca_cleanup_av_table(dev); + +err_qp_table_free: + mthca_cleanup_qp_table(dev); + +err_srq_table_free: + mthca_cleanup_srq_table(dev); + +err_cq_table_free: + mthca_cleanup_cq_table(dev); + +err_cmd_poll: + mthca_cmd_use_polling(dev); + +err_eq_table_free: + mthca_cleanup_eq_table(dev); + +err_pd_free: + mthca_pd_free(dev, &dev->driver_pd); + +err_mr_table_free: + mthca_cleanup_mr_table(dev); + +err_pd_table_free: + mthca_cleanup_pd_table(dev); + +err_kar_unmap: + iounmap(dev->kar); + +err_uar_free: + mthca_uar_free(dev, &dev->driver_uar); + +err_uar_table_free: + mthca_cleanup_uar_table(dev); + return err; +} + +static int mthca_enable_msi_x(struct mthca_dev *mdev) +{ + struct msix_entry entries[3]; + int err; + + entries[0].entry = 0; + entries[1].entry = 1; + entries[2].entry = 2; + + err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries)); + if (err) { + if (err > 0) + mthca_info(mdev, "Only %d MSI-X vectors available, " + "not using MSI-X\n", err); + return err; + } + + mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector; + mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector; + mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector; + + return 0; +} + +/* Types of supported HCA */ +enum { + TAVOR, /* MT23108 */ + ARBEL_COMPAT, /* MT25208 in Tavor compat mode */ + ARBEL_NATIVE, /* MT25208 with extended features */ + SINAI /* MT25204 */ +}; + +#define MTHCA_FW_VER(major, minor, subminor) \ + (((u64) (major) << 32) | ((u64) (minor) << 16) | (u64) (subminor)) + +static struct { + u64 latest_fw; + u32 flags; +} mthca_hca_table[] = { + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0), + .flags = 0 }, + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200), + .flags = MTHCA_FLAG_PCIE }, + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 3, 0), + .flags = MTHCA_FLAG_MEMFREE | + MTHCA_FLAG_PCIE }, + [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0), + .flags = MTHCA_FLAG_MEMFREE | + MTHCA_FLAG_PCIE | + MTHCA_FLAG_SINAI_OPT } +}; + +static int __mthca_init_one(struct pci_dev *pdev, int hca_type) +{ + int ddr_hidden = 0; + int err; + struct mthca_dev *mdev; + + printk(KERN_INFO PFX "Initializing %s\n", + pci_name(pdev)); + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device, " + "aborting.\n"); + return err; + } + + /* + * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not + * be present) + */ + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || + pci_resource_len(pdev, 0) != 1 << 20) { + dev_err(&pdev->dev, "Missing DCS, aborting.\n"); + err = -ENODEV; + goto err_disable_pdev; + } + if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing UAR, aborting.\n"); + err = -ENODEV; + goto err_disable_pdev; + } + if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) + ddr_hidden = 1; + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "Cannot obtain PCI resources, " + "aborting.\n"); + goto err_disable_pdev; + } + + pci_set_master(pdev); + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); + goto err_free_res; + } + } + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " + "consistent PCI DMA mask.\n"); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " + "aborting.\n"); + goto err_free_res; + } + } + + /* We can handle large RDMA requests, so allow larger segments. */ + dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); + + mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); + if (!mdev) { + dev_err(&pdev->dev, "Device struct alloc failed, " + "aborting.\n"); + err = -ENOMEM; + goto err_free_res; + } + + mdev->pdev = pdev; + + mdev->mthca_flags = mthca_hca_table[hca_type].flags; + if (ddr_hidden) + mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; + + /* + * Now reset the HCA before we touch the PCI capabilities or + * attempt a firmware command, since a boot ROM may have left + * the HCA in an undefined state. + */ + err = mthca_reset(mdev); + if (err) { + mthca_err(mdev, "Failed to reset HCA, aborting.\n"); + goto err_free_dev; + } + + if (mthca_cmd_init(mdev)) { + mthca_err(mdev, "Failed to init command interface, aborting.\n"); + goto err_free_dev; + } + + err = mthca_tune_pci(mdev); + if (err) + goto err_cmd; + + err = mthca_init_hca(mdev); + if (err) + goto err_cmd; + + if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { + mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n", + (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, + (int) (mdev->fw_ver & 0xffff), + (int) (mthca_hca_table[hca_type].latest_fw >> 32), + (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff, + (int) (mthca_hca_table[hca_type].latest_fw & 0xffff)); + mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n"); + } + + if (msi_x && !mthca_enable_msi_x(mdev)) + mdev->mthca_flags |= MTHCA_FLAG_MSI_X; + + err = mthca_setup_hca(mdev); + if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) { + if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) + pci_disable_msix(pdev); + mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X; + + err = mthca_setup_hca(mdev); + } + + if (err) + goto err_close; + + err = mthca_register_device(mdev); + if (err) + goto err_cleanup; + + err = mthca_create_agents(mdev); + if (err) + goto err_unregister; + + pci_set_drvdata(pdev, mdev); + mdev->hca_type = hca_type; + + mdev->active = true; + + return 0; + +err_unregister: + mthca_unregister_device(mdev); + +err_cleanup: + mthca_cleanup_mcg_table(mdev); + mthca_cleanup_av_table(mdev); + mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); + mthca_cleanup_cq_table(mdev); + mthca_cmd_use_polling(mdev); + mthca_cleanup_eq_table(mdev); + + mthca_pd_free(mdev, &mdev->driver_pd); + + mthca_cleanup_mr_table(mdev); + mthca_cleanup_pd_table(mdev); + mthca_cleanup_uar_table(mdev); + +err_close: + if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) + pci_disable_msix(pdev); + + mthca_close_hca(mdev); + +err_cmd: + mthca_cmd_cleanup(mdev); + +err_free_dev: + ib_dealloc_device(&mdev->ib_dev); + +err_free_res: + pci_release_regions(pdev); + +err_disable_pdev: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void __mthca_remove_one(struct pci_dev *pdev) +{ + struct mthca_dev *mdev = pci_get_drvdata(pdev); + int p; + + if (mdev) { + mthca_free_agents(mdev); + mthca_unregister_device(mdev); + + for (p = 1; p <= mdev->limits.num_ports; ++p) + mthca_CLOSE_IB(mdev, p); + + mthca_cleanup_mcg_table(mdev); + mthca_cleanup_av_table(mdev); + mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); + mthca_cleanup_cq_table(mdev); + mthca_cmd_use_polling(mdev); + mthca_cleanup_eq_table(mdev); + + mthca_pd_free(mdev, &mdev->driver_pd); + + mthca_cleanup_mr_table(mdev); + mthca_cleanup_pd_table(mdev); + + iounmap(mdev->kar); + mthca_uar_free(mdev, &mdev->driver_uar); + mthca_cleanup_uar_table(mdev); + mthca_close_hca(mdev); + mthca_cmd_cleanup(mdev); + + if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) + pci_disable_msix(pdev); + + ib_dealloc_device(&mdev->ib_dev); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + } +} + +int __mthca_restart_one(struct pci_dev *pdev) +{ + struct mthca_dev *mdev; + int hca_type; + + mdev = pci_get_drvdata(pdev); + if (!mdev) + return -ENODEV; + hca_type = mdev->hca_type; + __mthca_remove_one(pdev); + return __mthca_init_one(pdev, hca_type); +} + +static int __devinit mthca_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int ret; + + mutex_lock(&mthca_device_mutex); + + printk_once(KERN_INFO "%s", mthca_version); + + if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { + printk(KERN_ERR PFX "%s has invalid driver data %lx\n", + pci_name(pdev), id->driver_data); + mutex_unlock(&mthca_device_mutex); + return -ENODEV; + } + + ret = __mthca_init_one(pdev, id->driver_data); + + mutex_unlock(&mthca_device_mutex); + + return ret; +} + +static void __devexit mthca_remove_one(struct pci_dev *pdev) +{ + mutex_lock(&mthca_device_mutex); + __mthca_remove_one(pdev); + mutex_unlock(&mthca_device_mutex); +} + +static struct pci_device_id mthca_pci_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), + .driver_data = TAVOR }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR), + .driver_data = TAVOR }, + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT), + .driver_data = ARBEL_COMPAT }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT), + .driver_data = ARBEL_COMPAT }, + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL), + .driver_data = ARBEL_NATIVE }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL), + .driver_data = ARBEL_NATIVE }, + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI), + .driver_data = SINAI }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI), + .driver_data = SINAI }, + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI_OLD), + .driver_data = SINAI }, + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI_OLD), + .driver_data = SINAI }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mthca_pci_table); + +static struct pci_driver mthca_driver = { + .name = DRV_NAME, + .id_table = mthca_pci_table, + .probe = mthca_init_one, + .remove = __devexit_p(mthca_remove_one) +}; + +static void __init __mthca_check_profile_val(const char *name, int *pval, + int pval_default) +{ + /* value must be positive and power of 2 */ + int old_pval = *pval; + + if (old_pval <= 0) + *pval = pval_default; + else + *pval = roundup_pow_of_two(old_pval); + + if (old_pval != *pval) { + printk(KERN_WARNING PFX "Invalid value %d for %s in module parameter.\n", + old_pval, name); + printk(KERN_WARNING PFX "Corrected %s to %d.\n", name, *pval); + } +} + +#define mthca_check_profile_val(name, default) \ + __mthca_check_profile_val(#name, &hca_profile.name, default) + +static void __init mthca_validate_profile(void) +{ + mthca_check_profile_val(num_qp, MTHCA_DEFAULT_NUM_QP); + mthca_check_profile_val(rdb_per_qp, MTHCA_DEFAULT_RDB_PER_QP); + mthca_check_profile_val(num_cq, MTHCA_DEFAULT_NUM_CQ); + mthca_check_profile_val(num_mcg, MTHCA_DEFAULT_NUM_MCG); + mthca_check_profile_val(num_mpt, MTHCA_DEFAULT_NUM_MPT); + mthca_check_profile_val(num_mtt, MTHCA_DEFAULT_NUM_MTT); + mthca_check_profile_val(num_udav, MTHCA_DEFAULT_NUM_UDAV); + mthca_check_profile_val(fmr_reserved_mtts, MTHCA_DEFAULT_NUM_RESERVED_MTTS); + + if (hca_profile.fmr_reserved_mtts >= hca_profile.num_mtt) { + printk(KERN_WARNING PFX "Invalid fmr_reserved_mtts module parameter %d.\n", + hca_profile.fmr_reserved_mtts); + printk(KERN_WARNING PFX "(Must be smaller than num_mtt %d)\n", + hca_profile.num_mtt); + hca_profile.fmr_reserved_mtts = hca_profile.num_mtt / 2; + printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n", + hca_profile.fmr_reserved_mtts); + } + + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n", + log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8)); + log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); + } +} + +static int __init mthca_init(void) +{ + int ret; + + mthca_validate_profile(); + + ret = mthca_catas_init(); + if (ret) + return ret; + + ret = pci_register_driver(&mthca_driver); + if (ret < 0) { + mthca_catas_cleanup(); + return ret; + } + + return 0; +} + +static void __exit mthca_cleanup(void) +{ + pci_unregister_driver(&mthca_driver); + mthca_catas_cleanup(); +} + +module_init(mthca_init); +module_exit(mthca_cleanup); diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c new file mode 100644 index 00000000..6304ae8f --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +struct mthca_mgm { + __be32 next_gid_index; + u32 reserved[3]; + u8 gid[16]; + __be32 qp[MTHCA_QP_PER_MGM]; +}; + +static const u8 zero_gid[16]; /* automatically initialized to 0 */ + +/* + * Caller must hold MCG table semaphore. gid and mgm parameters must + * be properly aligned for command interface. + * + * Returns 0 unless a firmware command error occurs. + * + * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 + * and *mgm holds MGM entry. + * + * if GID is found in AMGM, *index = index in AMGM, *prev = index of + * previous entry in hash chain and *mgm holds AMGM entry. + * + * If no AMGM exists for given gid, *index = -1, *prev = index of last + * entry in hash chain and *mgm holds end of hash chain. + */ +static int find_mgm(struct mthca_dev *dev, + u8 *gid, struct mthca_mailbox *mgm_mailbox, + u16 *hash, int *prev, int *index) +{ + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm = mgm_mailbox->buf; + u8 *mgid; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return -ENOMEM; + mgid = mailbox->buf; + + memcpy(mgid, gid, 16); + + err = mthca_MGID_HASH(dev, mailbox, hash); + if (err) { + mthca_err(dev, "MGID_HASH failed (%d)\n", err); + goto out; + } + + if (0) + mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash); + + *index = *hash; + *prev = -1; + + do { + err = mthca_READ_MGM(dev, *index, mgm_mailbox); + if (err) { + mthca_err(dev, "READ_MGM failed (%d)\n", err); + goto out; + } + + if (!memcmp(mgm->gid, zero_gid, 16)) { + if (*index != *hash) { + mthca_err(dev, "Found zero MGID in AMGM.\n"); + err = -EINVAL; + } + goto out; + } + + if (!memcmp(mgm->gid, gid, 16)) + goto out; + + *prev = *index; + *index = be32_to_cpu(mgm->next_gid_index) >> 6; + } while (*index); + + *index = -1; + + out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm; + u16 hash; + int index, prev; + int link = 0; + int i; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + mutex_lock(&dev->mcg_table.mutex); + + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); + if (err) + goto out; + + if (index != -1) { + if (!memcmp(mgm->gid, zero_gid, 16)) + memcpy(mgm->gid, gid->raw, 16); + } else { + link = 1; + + index = mthca_alloc(&dev->mcg_table.alloc); + if (index == -1) { + mthca_err(dev, "No AMGM entries left\n"); + err = -ENOMEM; + goto out; + } + + err = mthca_READ_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "READ_MGM failed (%d)\n", err); + goto out; + } + memset(mgm, 0, sizeof *mgm); + memcpy(mgm->gid, gid->raw, 16); + } + + for (i = 0; i < MTHCA_QP_PER_MGM; ++i) + if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { + mthca_dbg(dev, "QP %06x already a member of MGM\n", + ibqp->qp_num); + err = 0; + goto out; + } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { + mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); + break; + } + + if (i == MTHCA_QP_PER_MGM) { + mthca_err(dev, "MGM at index %x is full.\n", index); + err = -ENOMEM; + goto out; + } + + err = mthca_WRITE_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM failed %d\n", err); + err = -EINVAL; + goto out; + } + + if (!link) + goto out; + + err = mthca_READ_MGM(dev, prev, mailbox); + if (err) { + mthca_err(dev, "READ_MGM failed %d\n", err); + goto out; + } + + mgm->next_gid_index = cpu_to_be32(index << 6); + + err = mthca_WRITE_MGM(dev, prev, mailbox); + if (err) + mthca_err(dev, "WRITE_MGM returned %d\n", err); + + out: + if (err && link && index != -1) { + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); + } + mutex_unlock(&dev->mcg_table.mutex); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm; + u16 hash; + int prev, index; + int i, loc; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + mutex_lock(&dev->mcg_table.mutex); + + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); + if (err) + goto out; + + if (index == -1) { + mthca_err(dev, "MGID %pI6 not found\n", gid->raw); + err = -EINVAL; + goto out; + } + + for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) { + if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) + loc = i; + if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) + break; + } + + if (loc == -1) { + mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num); + err = -EINVAL; + goto out; + } + + mgm->qp[loc] = mgm->qp[i - 1]; + mgm->qp[i - 1] = 0; + + err = mthca_WRITE_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM returned %d\n", err); + goto out; + } + + if (i != 1) + goto out; + + if (prev == -1) { + /* Remove entry from MGM */ + int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6; + if (amgm_index_to_free) { + err = mthca_READ_MGM(dev, amgm_index_to_free, + mailbox); + if (err) { + mthca_err(dev, "READ_MGM returned %d\n", err); + goto out; + } + } else + memset(mgm->gid, 0, 16); + + err = mthca_WRITE_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM returned %d\n", err); + goto out; + } + if (amgm_index_to_free) { + BUG_ON(amgm_index_to_free < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, amgm_index_to_free); + } + } else { + /* Remove entry from AMGM */ + int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; + err = mthca_READ_MGM(dev, prev, mailbox); + if (err) { + mthca_err(dev, "READ_MGM returned %d\n", err); + goto out; + } + + mgm->next_gid_index = cpu_to_be32(curr_next_index << 6); + + err = mthca_WRITE_MGM(dev, prev, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM returned %d\n", err); + goto out; + } + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); + } + + out: + mutex_unlock(&dev->mcg_table.mutex); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_init_mcg_table(struct mthca_dev *dev) +{ + int err; + int table_size = dev->limits.num_mgms + dev->limits.num_amgms; + + err = mthca_alloc_init(&dev->mcg_table.alloc, + table_size, + table_size - 1, + dev->limits.num_mgms); + if (err) + return err; + + mutex_init(&dev->mcg_table.mutex); + + return 0; +} + +void mthca_cleanup_mcg_table(struct mthca_dev *dev) +{ + mthca_alloc_cleanup(&dev->mcg_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c new file mode 100644 index 00000000..7d2e42dd --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -0,0 +1,760 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include + +#include "mthca_memfree.h" +#include "mthca_dev.h" +#include "mthca_cmd.h" + +/* + * We allocate in as big chunks as we can, up to a maximum of 256 KB + * per chunk. + */ +enum { + MTHCA_ICM_ALLOC_SIZE = 1 << 18, + MTHCA_TABLE_CHUNK_SIZE = 1 << 18 +}; + +struct mthca_user_db_table { + struct mutex mutex; + struct { + u64 uvirt; + struct scatterlist mem; + int refcount; + } page[0]; +}; + +static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk) +{ + int i; + + if (chunk->nsg > 0) + pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + for (i = 0; i < chunk->npages; ++i) + __free_pages(sg_page(&chunk->mem[i]), + get_order(chunk->mem[i].length)); +} + +static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk) +{ + int i; + + for (i = 0; i < chunk->npages; ++i) { + dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, + lowmem_page_address(sg_page(&chunk->mem[i])), + sg_dma_address(&chunk->mem[i])); + } +} + +void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent) +{ + struct mthca_icm_chunk *chunk, *tmp; + + if (!icm) + return; + + list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { + if (coherent) + mthca_free_icm_coherent(dev, chunk); + else + mthca_free_icm_pages(dev, chunk); + + kfree(chunk); + } + + kfree(icm); +} + +static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) +{ + struct page *page; + + /* + * Use __GFP_ZERO because buggy firmware assumes ICM pages are + * cleared, and subtle failures are seen if they aren't. + */ + page = alloc_pages(gfp_mask | __GFP_ZERO, order); + if (!page) + return -ENOMEM; + + sg_set_page(mem, page, PAGE_SIZE << order, 0); + return 0; +} + +static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, + int order, gfp_t gfp_mask) +{ + void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem), + gfp_mask); + if (!buf) + return -ENOMEM; + + sg_set_buf(mem, buf, PAGE_SIZE << order); + BUG_ON(mem->offset); + sg_dma_len(mem) = PAGE_SIZE << order; + return 0; +} + +struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, + gfp_t gfp_mask, int coherent) +{ + struct mthca_icm *icm; + struct mthca_icm_chunk *chunk = NULL; + int cur_order; + int ret; + + /* We use sg_set_buf for coherent allocs, which assumes low memory */ + BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); + + icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!icm) + return icm; + + icm->refcount = 0; + INIT_LIST_HEAD(&icm->chunk_list); + + cur_order = get_order(MTHCA_ICM_ALLOC_SIZE); + + while (npages > 0) { + if (!chunk) { + chunk = kmalloc(sizeof *chunk, + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!chunk) + goto fail; + + sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN); + chunk->npages = 0; + chunk->nsg = 0; + list_add_tail(&chunk->list, &icm->chunk_list); + } + + while (1 << cur_order > npages) + --cur_order; + + if (coherent) + ret = mthca_alloc_icm_coherent(&dev->pdev->dev, + &chunk->mem[chunk->npages], + cur_order, gfp_mask); + else + ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages], + cur_order, gfp_mask); + + if (!ret) { + ++chunk->npages; + + if (coherent) + ++chunk->nsg; + else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) { + chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + if (chunk->nsg <= 0) + goto fail; + } + + if (chunk->npages == MTHCA_ICM_CHUNK_LEN) + chunk = NULL; + + npages -= 1 << cur_order; + } else { + --cur_order; + if (cur_order < 0) + goto fail; + } + } + + if (!coherent && chunk) { + chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + if (chunk->nsg <= 0) + goto fail; + } + + return icm; + +fail: + mthca_free_icm(dev, icm, coherent); + return NULL; +} + +int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) +{ + int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + int ret = 0; + + mutex_lock(&table->mutex); + + if (table->icm[i]) { + ++table->icm[i]->refcount; + goto out; + } + + table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN, table->coherent); + if (!table->icm[i]) { + ret = -ENOMEM; + goto out; + } + + if (mthca_MAP_ICM(dev, table->icm[i], + table->virt + i * MTHCA_TABLE_CHUNK_SIZE)) { + mthca_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + ret = -ENOMEM; + goto out; + } + + ++table->icm[i]->refcount; + +out: + mutex_unlock(&table->mutex); + return ret; +} + +void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) +{ + int i; + + if (!mthca_is_memfree(dev)) + return; + + i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + + mutex_lock(&table->mutex); + + if (--table->icm[i]->refcount == 0) { + mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE); + mthca_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + } + + mutex_unlock(&table->mutex); +} + +void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle) +{ + int idx, offset, dma_offset, i; + struct mthca_icm_chunk *chunk; + struct mthca_icm *icm; + struct page *page = NULL; + + if (!table->lowmem) + return NULL; + + mutex_lock(&table->mutex); + + idx = (obj & (table->num_obj - 1)) * table->obj_size; + icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE]; + dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE; + + if (!icm) + goto out; + + list_for_each_entry(chunk, &icm->chunk_list, list) { + for (i = 0; i < chunk->npages; ++i) { + if (dma_handle && dma_offset >= 0) { + if (sg_dma_len(&chunk->mem[i]) > dma_offset) + *dma_handle = sg_dma_address(&chunk->mem[i]) + + dma_offset; + dma_offset -= sg_dma_len(&chunk->mem[i]); + } + /* DMA mapping can merge pages but not split them, + * so if we found the page, dma_handle has already + * been assigned to. */ + if (chunk->mem[i].length > offset) { + page = sg_page(&chunk->mem[i]); + goto out; + } + offset -= chunk->mem[i].length; + } + } + +out: + mutex_unlock(&table->mutex); + return page ? lowmem_page_address(page) + offset : NULL; +} + +int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end) +{ + int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size; + int i, err; + + for (i = start; i <= end; i += inc) { + err = mthca_table_get(dev, table, i); + if (err) + goto fail; + } + + return 0; + +fail: + while (i > start) { + i -= inc; + mthca_table_put(dev, table, i); + } + + return err; +} + +void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end) +{ + int i; + + if (!mthca_is_memfree(dev)) + return; + + for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size) + mthca_table_put(dev, table, i); +} + +struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, + u64 virt, int obj_size, + int nobj, int reserved, + int use_lowmem, int use_coherent) +{ + struct mthca_icm_table *table; + int obj_per_chunk; + int num_icm; + unsigned chunk_size; + int i; + + obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size; + num_icm = DIV_ROUND_UP(nobj, obj_per_chunk); + + table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL); + if (!table) + return NULL; + + table->virt = virt; + table->num_icm = num_icm; + table->num_obj = nobj; + table->obj_size = obj_size; + table->lowmem = use_lowmem; + table->coherent = use_coherent; + mutex_init(&table->mutex); + + for (i = 0; i < num_icm; ++i) + table->icm[i] = NULL; + + for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { + chunk_size = MTHCA_TABLE_CHUNK_SIZE; + if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size) + chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE; + + table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, + (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN, use_coherent); + if (!table->icm[i]) + goto err; + if (mthca_MAP_ICM(dev, table->icm[i], + virt + i * MTHCA_TABLE_CHUNK_SIZE)) { + mthca_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + goto err; + } + + /* + * Add a reference to this ICM chunk so that it never + * gets freed (since it contains reserved firmware objects). + */ + ++table->icm[i]->refcount; + } + + return table; + +err: + for (i = 0; i < num_icm; ++i) + if (table->icm[i]) { + mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE); + mthca_free_icm(dev, table->icm[i], table->coherent); + } + + kfree(table); + + return NULL; +} + +void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) +{ + int i; + + for (i = 0; i < table->num_icm; ++i) + if (table->icm[i]) { + mthca_UNMAP_ICM(dev, + table->virt + i * MTHCA_TABLE_CHUNK_SIZE, + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE); + mthca_free_icm(dev, table->icm[i], table->coherent); + } + + kfree(table); +} + +static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page) +{ + return dev->uar_table.uarc_base + + uar->index * dev->uar_table.uarc_size + + page * MTHCA_ICM_PAGE_SIZE; +} + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr) +{ + struct page *pages[1]; + int ret = 0; + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + if (index < 0 || index > dev->uar_table.uarc_size / 8) + return -EINVAL; + + mutex_lock(&db_tab->mutex); + + i = index / MTHCA_DB_REC_PER_PAGE; + + if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || + (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || + (uaddr & 4095)) { + ret = -EINVAL; + goto out; + } + + if (db_tab->page[i].refcount) { + ++db_tab->page[i].refcount; + goto out; + } + + ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, + pages, NULL); + if (ret < 0) + goto out; + + sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE, + uaddr & ~PAGE_MASK); + + ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + if (ret < 0) { + put_page(pages[0]); + goto out; + } + + ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), + mthca_uarc_virt(dev, uar, i)); + if (ret) { + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(sg_page(&db_tab->page[i].mem)); + goto out; + } + + db_tab->page[i].uvirt = uaddr; + db_tab->page[i].refcount = 1; + +out: + mutex_unlock(&db_tab->mutex); + return ret; +} + +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index) +{ + if (!mthca_is_memfree(dev)) + return; + + /* + * To make our bookkeeping simpler, we don't unmap DB + * pages until we clean up the whole db table. + */ + + mutex_lock(&db_tab->mutex); + + --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount; + + mutex_unlock(&db_tab->mutex); +} + +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) +{ + struct mthca_user_db_table *db_tab; + int npages; + int i; + + if (!mthca_is_memfree(dev)) + return NULL; + + npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; + db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); + if (!db_tab) + return ERR_PTR(-ENOMEM); + + mutex_init(&db_tab->mutex); + for (i = 0; i < npages; ++i) { + db_tab->page[i].refcount = 0; + db_tab->page[i].uvirt = 0; + sg_init_table(&db_tab->page[i].mem, 1); + } + + return db_tab; +} + +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab) +{ + int i; + + if (!mthca_is_memfree(dev)) + return; + + for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) { + if (db_tab->page[i].uvirt) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1); + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(sg_page(&db_tab->page[i].mem)); + } + } + + kfree(db_tab); +} + +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db) +{ + int group; + int start, end, dir; + int i, j; + struct mthca_db_page *page; + int ret = 0; + + mutex_lock(&dev->db_tab->mutex); + + switch (type) { + case MTHCA_DB_TYPE_CQ_ARM: + case MTHCA_DB_TYPE_SQ: + group = 0; + start = 0; + end = dev->db_tab->max_group1; + dir = 1; + break; + + case MTHCA_DB_TYPE_CQ_SET_CI: + case MTHCA_DB_TYPE_RQ: + case MTHCA_DB_TYPE_SRQ: + group = 1; + start = dev->db_tab->npages - 1; + end = dev->db_tab->min_group2; + dir = -1; + break; + + default: + ret = -EINVAL; + goto out; + } + + for (i = start; i != end; i += dir) + if (dev->db_tab->page[i].db_rec && + !bitmap_full(dev->db_tab->page[i].used, + MTHCA_DB_REC_PER_PAGE)) { + page = dev->db_tab->page + i; + goto found; + } + + for (i = start; i != end; i += dir) + if (!dev->db_tab->page[i].db_rec) { + page = dev->db_tab->page + i; + goto alloc; + } + + if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) { + ret = -ENOMEM; + goto out; + } + + if (group == 0) + ++dev->db_tab->max_group1; + else + --dev->db_tab->min_group2; + + page = dev->db_tab->page + end; + +alloc: + page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, + &page->mapping, GFP_KERNEL); + if (!page->db_rec) { + ret = -ENOMEM; + goto out; + } + memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE); + + ret = mthca_MAP_ICM_page(dev, page->mapping, + mthca_uarc_virt(dev, &dev->driver_uar, i)); + if (ret) { + dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, + page->db_rec, page->mapping); + goto out; + } + + bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE); + +found: + j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE); + set_bit(j, page->used); + + if (group == 1) + j = MTHCA_DB_REC_PER_PAGE - 1 - j; + + ret = i * MTHCA_DB_REC_PER_PAGE + j; + + page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5)); + + *db = (__be32 *) &page->db_rec[j]; + +out: + mutex_unlock(&dev->db_tab->mutex); + + return ret; +} + +void mthca_free_db(struct mthca_dev *dev, int type, int db_index) +{ + int i, j; + struct mthca_db_page *page; + + i = db_index / MTHCA_DB_REC_PER_PAGE; + j = db_index % MTHCA_DB_REC_PER_PAGE; + + page = dev->db_tab->page + i; + + mutex_lock(&dev->db_tab->mutex); + + page->db_rec[j] = 0; + if (i >= dev->db_tab->min_group2) + j = MTHCA_DB_REC_PER_PAGE - 1 - j; + clear_bit(j, page->used); + + if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && + i >= dev->db_tab->max_group1 - 1) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1); + + dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, + page->db_rec, page->mapping); + page->db_rec = NULL; + + if (i == dev->db_tab->max_group1) { + --dev->db_tab->max_group1; + /* XXX may be able to unmap more pages now */ + } + if (i == dev->db_tab->min_group2) + ++dev->db_tab->min_group2; + } + + mutex_unlock(&dev->db_tab->mutex); +} + +int mthca_init_db_tab(struct mthca_dev *dev) +{ + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL); + if (!dev->db_tab) + return -ENOMEM; + + mutex_init(&dev->db_tab->mutex); + + dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; + dev->db_tab->max_group1 = 0; + dev->db_tab->min_group2 = dev->db_tab->npages - 1; + + dev->db_tab->page = kmalloc(dev->db_tab->npages * + sizeof *dev->db_tab->page, + GFP_KERNEL); + if (!dev->db_tab->page) { + kfree(dev->db_tab); + return -ENOMEM; + } + + for (i = 0; i < dev->db_tab->npages; ++i) + dev->db_tab->page[i].db_rec = NULL; + + return 0; +} + +void mthca_cleanup_db_tab(struct mthca_dev *dev) +{ + int i; + + if (!mthca_is_memfree(dev)) + return; + + /* + * Because we don't always free our UARC pages when they + * become empty to make mthca_free_db() simpler we need to + * make a sweep through the doorbell pages and free any + * leftover pages now. + */ + for (i = 0; i < dev->db_tab->npages; ++i) { + if (!dev->db_tab->page[i].db_rec) + continue; + + if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) + mthca_warn(dev, "Kernel UARC page %d not empty\n", i); + + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1); + + dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, + dev->db_tab->page[i].db_rec, + dev->db_tab->page[i].mapping); + } + + kfree(dev->db_tab->page); + kfree(dev->db_tab); +} diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h new file mode 100644 index 00000000..da9b8f9b --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_MEMFREE_H +#define MTHCA_MEMFREE_H + +#include +#include + +#define MTHCA_ICM_CHUNK_LEN \ + ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ + (sizeof (struct scatterlist))) + +enum { + MTHCA_ICM_PAGE_SHIFT = 12, + MTHCA_ICM_PAGE_SIZE = 1 << MTHCA_ICM_PAGE_SHIFT, + MTHCA_DB_REC_PER_PAGE = MTHCA_ICM_PAGE_SIZE / 8 +}; + +struct mthca_icm_chunk { + struct list_head list; + int npages; + int nsg; + struct scatterlist mem[MTHCA_ICM_CHUNK_LEN]; +}; + +struct mthca_icm { + struct list_head chunk_list; + int refcount; +}; + +struct mthca_icm_table { + u64 virt; + int num_icm; + int num_obj; + int obj_size; + int lowmem; + int coherent; + struct mutex mutex; + struct mthca_icm *icm[0]; +}; + +struct mthca_icm_iter { + struct mthca_icm *icm; + struct mthca_icm_chunk *chunk; + int page_idx; +}; + +struct mthca_dev; + +struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, + gfp_t gfp_mask, int coherent); +void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent); + +struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, + u64 virt, int obj_size, + int nobj, int reserved, + int use_lowmem, int use_coherent); +void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table); +int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); +void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); +void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle); +int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end); +void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end); + +static inline void mthca_icm_first(struct mthca_icm *icm, + struct mthca_icm_iter *iter) +{ + iter->icm = icm; + iter->chunk = list_empty(&icm->chunk_list) ? + NULL : list_entry(icm->chunk_list.next, + struct mthca_icm_chunk, list); + iter->page_idx = 0; +} + +static inline int mthca_icm_last(struct mthca_icm_iter *iter) +{ + return !iter->chunk; +} + +static inline void mthca_icm_next(struct mthca_icm_iter *iter) +{ + if (++iter->page_idx >= iter->chunk->nsg) { + if (iter->chunk->list.next == &iter->icm->chunk_list) { + iter->chunk = NULL; + return; + } + + iter->chunk = list_entry(iter->chunk->list.next, + struct mthca_icm_chunk, list); + iter->page_idx = 0; + } +} + +static inline dma_addr_t mthca_icm_addr(struct mthca_icm_iter *iter) +{ + return sg_dma_address(&iter->chunk->mem[iter->page_idx]); +} + +static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter) +{ + return sg_dma_len(&iter->chunk->mem[iter->page_idx]); +} + +struct mthca_db_page { + DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); + __be64 *db_rec; + dma_addr_t mapping; +}; + +struct mthca_db_table { + int npages; + int max_group1; + int min_group2; + struct mthca_db_page *page; + struct mutex mutex; +}; + +enum mthca_db_type { + MTHCA_DB_TYPE_INVALID = 0x0, + MTHCA_DB_TYPE_CQ_SET_CI = 0x1, + MTHCA_DB_TYPE_CQ_ARM = 0x2, + MTHCA_DB_TYPE_SQ = 0x3, + MTHCA_DB_TYPE_RQ = 0x4, + MTHCA_DB_TYPE_SRQ = 0x5, + MTHCA_DB_TYPE_GROUP_SEP = 0x7 +}; + +struct mthca_user_db_table; +struct mthca_uar; + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr); +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index); +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev); +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab); + +int mthca_init_db_tab(struct mthca_dev *dev); +void mthca_cleanup_db_tab(struct mthca_dev *dev); +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db); +void mthca_free_db(struct mthca_dev *dev, int type, int db_index); + +#endif /* MTHCA_MEMFREE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c new file mode 100644 index 00000000..ed9a989e --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -0,0 +1,965 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" + +struct mthca_mtt { + struct mthca_buddy *buddy; + int order; + u32 first_seg; +}; + +/* + * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. + */ +struct mthca_mpt_entry { + __be32 flags; + __be32 page_size; + __be32 key; + __be32 pd; + __be64 start; + __be64 length; + __be32 lkey; + __be32 window_count; + __be32 window_count_limit; + __be64 mtt_seg; + __be32 mtt_sz; /* Arbel only */ + u32 reserved[2]; +} __attribute__((packed)); + +#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MTHCA_MPT_FLAG_MIO (1 << 17) +#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9) +#define MTHCA_MPT_FLAG_REGION (1 << 8) + +#define MTHCA_MTT_FLAG_PRESENT 1 + +#define MTHCA_MPT_STATUS_SW 0xF0 +#define MTHCA_MPT_STATUS_HW 0x00 + +#define SINAI_FMR_KEY_INC 0x1000000 + +/* + * Buddy allocator for MTT segments (currently not very efficient + * since it doesn't keep a free list and just searches linearly + * through the bitmaps) + */ + +static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) +{ + int o; + int m; + u32 seg; + + spin_lock(&buddy->lock); + + for (o = order; o <= buddy->max_order; ++o) + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } + + spin_unlock(&buddy->lock); + return -1; + + found: + clear_bit(seg, buddy->bits[o]); + --buddy->num_free[o]; + + while (o > order) { + --o; + seg <<= 1; + set_bit(seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; + } + + spin_unlock(&buddy->lock); + + seg <<= order; + + return seg; +} + +static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) +{ + seg >>= order; + + spin_lock(&buddy->lock); + + while (test_bit(seg ^ 1, buddy->bits[order])) { + clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + + set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; + + spin_unlock(&buddy->lock); +} + +static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) +{ + int i, s; + + buddy->max_order = max_order; + spin_lock_init(&buddy->lock); + + buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), + GFP_KERNEL); + buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) + goto err_out; + + for (i = 0; i <= buddy->max_order; ++i) { + s = BITS_TO_LONGS(1 << (buddy->max_order - i)); + buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); + if (!buddy->bits[i]) + goto err_out_free; + bitmap_zero(buddy->bits[i], + 1 << (buddy->max_order - i)); + } + + set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free: + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + +err_out: + kfree(buddy->bits); + kfree(buddy->num_free); + + return -ENOMEM; +} + +static void mthca_buddy_cleanup(struct mthca_buddy *buddy) +{ + int i; + + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + + kfree(buddy->bits); + kfree(buddy->num_free); +} + +static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, + struct mthca_buddy *buddy) +{ + u32 seg = mthca_buddy_alloc(buddy, order); + + if (seg == -1) + return -1; + + if (mthca_is_memfree(dev)) + if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg, + seg + (1 << order) - 1)) { + mthca_buddy_free(buddy, seg, order); + seg = -1; + } + + return seg; +} + +static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, + struct mthca_buddy *buddy) +{ + struct mthca_mtt *mtt; + int i; + + if (size <= 0) + return ERR_PTR(-EINVAL); + + mtt = kmalloc(sizeof *mtt, GFP_KERNEL); + if (!mtt) + return ERR_PTR(-ENOMEM); + + mtt->buddy = buddy; + mtt->order = 0; + for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1) + ++mtt->order; + + mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); + if (mtt->first_seg == -1) { + kfree(mtt); + return ERR_PTR(-ENOMEM); + } + + return mtt; +} + +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size) +{ + return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy); +} + +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt) +{ + if (!mtt) + return; + + mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order); + + mthca_table_put_range(dev, dev->mr_table.mtt_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + + kfree(mtt); +} + +static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + struct mthca_mailbox *mailbox; + __be64 *mtt_entry; + int err = 0; + int i; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mtt_entry = mailbox->buf; + + while (list_len > 0) { + mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + + mtt->first_seg * dev->limits.mtt_seg_size + + start_index * 8); + mtt_entry[1] = 0; + for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) + mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | + MTHCA_MTT_FLAG_PRESENT); + + /* + * If we have an odd number of entries to write, add + * one more dummy entry for firmware efficiency. + */ + if (i & 1) + mtt_entry[i + 2] = 0; + + err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1); + if (err) { + mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); + goto out; + } + + list_len -= i; + start_index += i; + buffer_list += i; + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_write_mtt_size(struct mthca_dev *dev) +{ + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || + !(dev->mthca_flags & MTHCA_FLAG_FMR)) + /* + * Be friendly to WRITE_MTT command + * and leave two empty slots for the + * index and reserved fields of the + * mailbox. + */ + return PAGE_SIZE / sizeof (u64) - 2; + + /* For Arbel, all MTTs must fit in the same page. */ + return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff; +} + +static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, + struct mthca_mtt *mtt, int start_index, + u64 *buffer_list, int list_len) +{ + u64 __iomem *mtts; + int i; + + mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size + + start_index * sizeof (u64); + for (i = 0; i < list_len; ++i) + mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT), + mtts + i); +} + +static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, + struct mthca_mtt *mtt, int start_index, + u64 *buffer_list, int list_len) +{ + __be64 *mtts; + dma_addr_t dma_handle; + int i; + int s = start_index * sizeof (u64); + + /* For Arbel, all MTTs must fit in the same page. */ + BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); + /* Require full segments */ + BUG_ON(s % dev->limits.mtt_seg_size); + + mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + + s / dev->limits.mtt_seg_size, &dma_handle); + + BUG_ON(!mtts); + + dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, + list_len * sizeof (u64), DMA_TO_DEVICE); + + for (i = 0; i < list_len; ++i) + mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT); + + dma_sync_single_for_device(&dev->pdev->dev, dma_handle, + list_len * sizeof (u64), DMA_TO_DEVICE); +} + +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + int size = mthca_write_mtt_size(dev); + int chunk; + + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || + !(dev->mthca_flags & MTHCA_FLAG_FMR)) + return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); + + while (list_len > 0) { + chunk = min(size, list_len); + if (mthca_is_memfree(dev)) + mthca_arbel_write_mtt_seg(dev, mtt, start_index, + buffer_list, chunk); + else + mthca_tavor_write_mtt_seg(dev, mtt, start_index, + buffer_list, chunk); + + list_len -= chunk; + start_index += chunk; + buffer_list += chunk; + } + + return 0; +} + +static inline u32 tavor_hw_index_to_key(u32 ind) +{ + return ind; +} + +static inline u32 tavor_key_to_hw_index(u32 key) +{ + return key; +} + +static inline u32 arbel_hw_index_to_key(u32 ind) +{ + return (ind >> 24) | (ind << 8); +} + +static inline u32 arbel_key_to_hw_index(u32 key) +{ + return (key << 24) | (key >> 8); +} + +static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind) +{ + if (mthca_is_memfree(dev)) + return arbel_hw_index_to_key(ind); + else + return tavor_hw_index_to_key(ind); +} + +static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) +{ + if (mthca_is_memfree(dev)) + return arbel_key_to_hw_index(key); + else + return tavor_key_to_hw_index(key); +} + +static inline u32 adjust_key(struct mthca_dev *dev, u32 key) +{ + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + return ((key << 20) & 0x800000) | (key & 0x7fffff); + else + return key; +} + +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) +{ + struct mthca_mailbox *mailbox; + struct mthca_mpt_entry *mpt_entry; + u32 key; + int i; + int err; + + WARN_ON(buffer_size_shift >= 32); + + key = mthca_alloc(&dev->mr_table.mpt_alloc); + if (key == -1) + return -ENOMEM; + key = adjust_key(dev, key); + mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->mr_table.mpt_table, key); + if (err) + goto err_out_mpt_free; + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_table; + } + mpt_entry = mailbox->buf; + + mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | + MTHCA_MPT_FLAG_MIO | + MTHCA_MPT_FLAG_REGION | + access); + if (!mr->mtt) + mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL); + + mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); + mpt_entry->key = cpu_to_be32(key); + mpt_entry->pd = cpu_to_be32(pd); + mpt_entry->start = cpu_to_be64(iova); + mpt_entry->length = cpu_to_be64(total_size); + + memset(&mpt_entry->lkey, 0, + sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); + + if (mr->mtt) + mpt_entry->mtt_seg = + cpu_to_be64(dev->mr_table.mtt_base + + mr->mtt->first_seg * dev->limits.mtt_seg_size); + + if (0) { + mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } + + err = mthca_SW2HW_MPT(dev, mailbox, + key & (dev->limits.num_mpts - 1)); + if (err) { + mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_out_mailbox; + } + + mthca_free_mailbox(dev, mailbox); + return err; + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_table: + mthca_table_put(dev, dev->mr_table.mpt_table, key); + +err_out_mpt_free: + mthca_free(&dev->mr_table.mpt_alloc, key); + return err; +} + +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_mr *mr) +{ + mr->mtt = NULL; + return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr); +} + +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, + u64 *buffer_list, int buffer_size_shift, + int list_len, u64 iova, u64 total_size, + u32 access, struct mthca_mr *mr) +{ + int err; + + mr->mtt = mthca_alloc_mtt(dev, list_len); + if (IS_ERR(mr->mtt)) + return PTR_ERR(mr->mtt); + + err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len); + if (err) { + mthca_free_mtt(dev, mr->mtt); + return err; + } + + err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova, + total_size, access, mr); + if (err) + mthca_free_mtt(dev, mr->mtt); + + return err; +} + +/* Free mr or fmr */ +static void mthca_free_region(struct mthca_dev *dev, u32 lkey) +{ + mthca_table_put(dev, dev->mr_table.mpt_table, + key_to_hw_index(dev, lkey)); + + mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); +} + +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) +{ + int err; + + err = mthca_HW2SW_MPT(dev, NULL, + key_to_hw_index(dev, mr->ibmr.lkey) & + (dev->limits.num_mpts - 1)); + if (err) + mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err); + + mthca_free_region(dev, mr->ibmr.lkey); + mthca_free_mtt(dev, mr->mtt); +} + +int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_fmr *mr) +{ + struct mthca_mpt_entry *mpt_entry; + struct mthca_mailbox *mailbox; + u64 mtt_seg; + u32 key, idx; + int list_len = mr->attr.max_pages; + int err = -ENOMEM; + int i; + + if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32) + return -EINVAL; + + /* For Arbel, all MTTs must fit in the same page. */ + if (mthca_is_memfree(dev) && + mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE) + return -EINVAL; + + mr->maps = 0; + + key = mthca_alloc(&dev->mr_table.mpt_alloc); + if (key == -1) + return -ENOMEM; + key = adjust_key(dev, key); + + idx = key & (dev->limits.num_mpts - 1); + mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->mr_table.mpt_table, key); + if (err) + goto err_out_mpt_free; + + mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL); + BUG_ON(!mr->mem.arbel.mpt); + } else + mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + + sizeof *(mr->mem.tavor.mpt) * idx; + + mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); + goto err_out_table; + } + + mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size; + + if (mthca_is_memfree(dev)) { + mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, + mr->mtt->first_seg, + &mr->mem.arbel.dma_handle); + BUG_ON(!mr->mem.arbel.mtts); + } else + mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_free_mtt; + } + + mpt_entry = mailbox->buf; + + mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | + MTHCA_MPT_FLAG_MIO | + MTHCA_MPT_FLAG_REGION | + access); + + mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12); + mpt_entry->key = cpu_to_be32(key); + mpt_entry->pd = cpu_to_be32(pd); + memset(&mpt_entry->start, 0, + sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start)); + mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg); + + if (0) { + mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } + + err = mthca_SW2HW_MPT(dev, mailbox, + key & (dev->limits.num_mpts - 1)); + if (err) { + mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_out_mailbox_free; + } + + mthca_free_mailbox(dev, mailbox); + return 0; + +err_out_mailbox_free: + mthca_free_mailbox(dev, mailbox); + +err_out_free_mtt: + mthca_free_mtt(dev, mr->mtt); + +err_out_table: + mthca_table_put(dev, dev->mr_table.mpt_table, key); + +err_out_mpt_free: + mthca_free(&dev->mr_table.mpt_alloc, key); + return err; +} + +int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr) +{ + if (fmr->maps) + return -EBUSY; + + mthca_free_region(dev, fmr->ibmr.lkey); + mthca_free_mtt(dev, fmr->mtt); + + return 0; +} + +static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list, + int list_len, u64 iova) +{ + int i, page_mask; + + if (list_len > fmr->attr.max_pages) + return -EINVAL; + + page_mask = (1 << fmr->attr.page_shift) - 1; + + /* We are getting page lists, so va must be page aligned. */ + if (iova & page_mask) + return -EINVAL; + + /* Trust the user not to pass misaligned data in page_list */ + if (0) + for (i = 0; i < list_len; ++i) { + if (page_list[i] & ~page_mask) + return -EINVAL; + } + + if (fmr->maps >= fmr->attr.max_maps) + return -EINVAL; + + return 0; +} + + +int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + struct mthca_fmr *fmr = to_mfmr(ibfmr); + struct mthca_dev *dev = to_mdev(ibfmr->device); + struct mthca_mpt_entry mpt_entry; + u32 key; + int i, err; + + err = mthca_check_fmr(fmr, page_list, list_len, iova); + if (err) + return err; + + ++fmr->maps; + + key = tavor_key_to_hw_index(fmr->ibmr.lkey); + key += dev->limits.num_mpts; + fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key); + + writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); + + for (i = 0; i < list_len; ++i) { + __be64 mtt_entry = cpu_to_be64(page_list[i] | + MTHCA_MTT_FLAG_PRESENT); + mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i); + } + + mpt_entry.lkey = cpu_to_be32(key); + mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); + mpt_entry.start = cpu_to_be64(iova); + + __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); + memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, + offsetof(struct mthca_mpt_entry, window_count) - + offsetof(struct mthca_mpt_entry, start)); + + writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt); + + return 0; +} + +int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + struct mthca_fmr *fmr = to_mfmr(ibfmr); + struct mthca_dev *dev = to_mdev(ibfmr->device); + u32 key; + int i, err; + + err = mthca_check_fmr(fmr, page_list, list_len, iova); + if (err) + return err; + + ++fmr->maps; + + key = arbel_key_to_hw_index(fmr->ibmr.lkey); + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + key += SINAI_FMR_KEY_INC; + else + key += dev->limits.num_mpts; + fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); + + *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; + + wmb(); + + dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle, + list_len * sizeof(u64), DMA_TO_DEVICE); + + for (i = 0; i < list_len; ++i) + fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] | + MTHCA_MTT_FLAG_PRESENT); + + dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle, + list_len * sizeof(u64), DMA_TO_DEVICE); + + fmr->mem.arbel.mpt->key = cpu_to_be32(key); + fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); + fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); + fmr->mem.arbel.mpt->start = cpu_to_be64(iova); + + wmb(); + + *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW; + + wmb(); + + return 0; +} + +void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) +{ + if (!fmr->maps) + return; + + fmr->maps = 0; + + writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); +} + +void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) +{ + if (!fmr->maps) + return; + + fmr->maps = 0; + + *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; +} + +int mthca_init_mr_table(struct mthca_dev *dev) +{ + phys_addr_t addr; + int mpts, mtts, err, i; + + err = mthca_alloc_init(&dev->mr_table.mpt_alloc, + dev->limits.num_mpts, + ~0, dev->limits.reserved_mrws); + if (err) + return err; + + if (!mthca_is_memfree(dev) && + (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) + dev->limits.fmr_reserved_mtts = 0; + else + dev->mthca_flags |= MTHCA_FLAG_FMR; + + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + mthca_dbg(dev, "Memory key throughput optimization activated.\n"); + + err = mthca_buddy_init(&dev->mr_table.mtt_buddy, + fls(dev->limits.num_mtt_segs - 1)); + + if (err) + goto err_mtt_buddy; + + dev->mr_table.tavor_fmr.mpt_base = NULL; + dev->mr_table.tavor_fmr.mtt_base = NULL; + + if (dev->limits.fmr_reserved_mtts) { + i = fls(dev->limits.fmr_reserved_mtts - 1); + + if (i >= 31) { + mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n"); + err = -EINVAL; + goto err_fmr_mpt; + } + mpts = mtts = 1 << i; + } else { + mtts = dev->limits.num_mtt_segs; + mpts = dev->limits.num_mpts; + } + + if (!mthca_is_memfree(dev) && + (dev->mthca_flags & MTHCA_FLAG_FMR)) { + + addr = pci_resource_start(dev->pdev, 4) + + ((pci_resource_len(dev->pdev, 4) - 1) & + dev->mr_table.mpt_base); + + dev->mr_table.tavor_fmr.mpt_base = + ioremap(addr, mpts * sizeof(struct mthca_mpt_entry)); + + if (!dev->mr_table.tavor_fmr.mpt_base) { + mthca_warn(dev, "MPT ioremap for FMR failed.\n"); + err = -ENOMEM; + goto err_fmr_mpt; + } + + addr = pci_resource_start(dev->pdev, 4) + + ((pci_resource_len(dev->pdev, 4) - 1) & + dev->mr_table.mtt_base); + + dev->mr_table.tavor_fmr.mtt_base = + ioremap(addr, mtts * dev->limits.mtt_seg_size); + if (!dev->mr_table.tavor_fmr.mtt_base) { + mthca_warn(dev, "MTT ioremap for FMR failed.\n"); + err = -ENOMEM; + goto err_fmr_mtt; + } + } + + if (dev->limits.fmr_reserved_mtts) { + err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1)); + if (err) + goto err_fmr_mtt_buddy; + + /* Prevent regular MRs from using FMR keys */ + err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1)); + if (err) + goto err_reserve_fmr; + + dev->mr_table.fmr_mtt_buddy = + &dev->mr_table.tavor_fmr.mtt_buddy; + } else + dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy; + + /* FMR table is always the first, take reserved MTTs out of there */ + if (dev->limits.reserved_mtts) { + i = fls(dev->limits.reserved_mtts - 1); + + if (mthca_alloc_mtt_range(dev, i, + dev->mr_table.fmr_mtt_buddy) == -1) { + mthca_warn(dev, "MTT table of order %d is too small.\n", + dev->mr_table.fmr_mtt_buddy->max_order); + err = -ENOMEM; + goto err_reserve_mtts; + } + } + + return 0; + +err_reserve_mtts: +err_reserve_fmr: + if (dev->limits.fmr_reserved_mtts) + mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy); + +err_fmr_mtt_buddy: + if (dev->mr_table.tavor_fmr.mtt_base) + iounmap(dev->mr_table.tavor_fmr.mtt_base); + +err_fmr_mtt: + if (dev->mr_table.tavor_fmr.mpt_base) + iounmap(dev->mr_table.tavor_fmr.mpt_base); + +err_fmr_mpt: + mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); + +err_mtt_buddy: + mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); + + return err; +} + +void mthca_cleanup_mr_table(struct mthca_dev *dev) +{ + /* XXX check if any MRs are still allocated? */ + if (dev->limits.fmr_reserved_mtts) + mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy); + + mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); + + if (dev->mr_table.tavor_fmr.mtt_base) + iounmap(dev->mr_table.tavor_fmr.mtt_base); + if (dev->mr_table.tavor_fmr.mpt_base) + iounmap(dev->mr_table.tavor_fmr.mpt_base); + + mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c new file mode 100644 index 00000000..266f14e4 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "mthca_dev.h" + +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) +{ + int err = 0; + + pd->privileged = privileged; + + atomic_set(&pd->sqp_count, 0); + pd->pd_num = mthca_alloc(&dev->pd_table.alloc); + if (pd->pd_num == -1) + return -ENOMEM; + + if (privileged) { + err = mthca_mr_alloc_notrans(dev, pd->pd_num, + MTHCA_MPT_FLAG_LOCAL_READ | + MTHCA_MPT_FLAG_LOCAL_WRITE, + &pd->ntmr); + if (err) + mthca_free(&dev->pd_table.alloc, pd->pd_num); + } + + return err; +} + +void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) +{ + if (pd->privileged) + mthca_free_mr(dev, &pd->ntmr); + mthca_free(&dev->pd_table.alloc, pd->pd_num); +} + +int mthca_init_pd_table(struct mthca_dev *dev) +{ + return mthca_alloc_init(&dev->pd_table.alloc, + dev->limits.num_pds, + (1 << 24) - 1, + dev->limits.reserved_pds); +} + +void mthca_cleanup_pd_table(struct mthca_dev *dev) +{ + /* XXX check if any PDs are still allocated? */ + mthca_alloc_cleanup(&dev->pd_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c new file mode 100644 index 00000000..8edb28a9 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "mthca_profile.h" + +enum { + MTHCA_RES_QP, + MTHCA_RES_EEC, + MTHCA_RES_SRQ, + MTHCA_RES_CQ, + MTHCA_RES_EQP, + MTHCA_RES_EEEC, + MTHCA_RES_EQ, + MTHCA_RES_RDB, + MTHCA_RES_MCG, + MTHCA_RES_MPT, + MTHCA_RES_MTT, + MTHCA_RES_UAR, + MTHCA_RES_UDAV, + MTHCA_RES_UARC, + MTHCA_RES_NUM +}; + +enum { + MTHCA_NUM_EQS = 32, + MTHCA_NUM_PDS = 1 << 15 +}; + +s64 mthca_make_profile(struct mthca_dev *dev, + struct mthca_profile *request, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca) +{ + struct mthca_resource { + u64 size; + u64 start; + int type; + int num; + int log_num; + }; + + u64 mem_base, mem_avail; + s64 total_size = 0; + struct mthca_resource *profile; + struct mthca_resource tmp; + int i, j; + + profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); + if (!profile) + return -ENOMEM; + + profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz; + profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz; + profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz; + profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz; + profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz; + profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz; + profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz; + profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; + profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; + profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; + profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size; + profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; + profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; + profile[MTHCA_RES_UARC].size = request->uarc_size; + + profile[MTHCA_RES_QP].num = request->num_qp; + profile[MTHCA_RES_SRQ].num = request->num_srq; + profile[MTHCA_RES_EQP].num = request->num_qp; + profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp; + profile[MTHCA_RES_CQ].num = request->num_cq; + profile[MTHCA_RES_EQ].num = MTHCA_NUM_EQS; + profile[MTHCA_RES_MCG].num = request->num_mcg; + profile[MTHCA_RES_MPT].num = request->num_mpt; + profile[MTHCA_RES_MTT].num = request->num_mtt; + profile[MTHCA_RES_UAR].num = request->num_uar; + profile[MTHCA_RES_UARC].num = request->num_uar; + profile[MTHCA_RES_UDAV].num = request->num_udav; + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + profile[i].type = i; + profile[i].log_num = max(ffs(profile[i].num) - 1, 0); + profile[i].size *= profile[i].num; + if (mthca_is_memfree(dev)) + profile[i].size = max(profile[i].size, (u64) PAGE_SIZE); + } + + if (mthca_is_memfree(dev)) { + mem_base = 0; + mem_avail = dev_lim->hca.arbel.max_icm_sz; + } else { + mem_base = dev->ddr_start; + mem_avail = dev->fw.tavor.fw_start - dev->ddr_start; + } + + /* + * Sort the resources in decreasing order of size. Since they + * all have sizes that are powers of 2, we'll be able to keep + * resources aligned to their size and pack them without gaps + * using the sorted order. + */ + for (i = MTHCA_RES_NUM; i > 0; --i) + for (j = 1; j < i; ++j) { + if (profile[j].size > profile[j - 1].size) { + tmp = profile[j]; + profile[j] = profile[j - 1]; + profile[j - 1] = tmp; + } + } + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + if (profile[i].size) { + profile[i].start = mem_base + total_size; + total_size += profile[i].size; + } + if (total_size > mem_avail) { + mthca_err(dev, "Profile requires 0x%llx bytes; " + "won't fit in 0x%llx bytes of context memory.\n", + (unsigned long long) total_size, + (unsigned long long) mem_avail); + kfree(profile); + return -ENOMEM; + } + + if (profile[i].size) + mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx " + "(size 0x%8llx)\n", + i, profile[i].type, profile[i].log_num, + (unsigned long long) profile[i].start, + (unsigned long long) profile[i].size); + } + + if (mthca_is_memfree(dev)) + mthca_dbg(dev, "HCA context memory: reserving %d KB\n", + (int) (total_size >> 10)); + else + mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n", + (int) (total_size >> 10), (int) (mem_avail >> 10), + (int) ((mem_avail - total_size) >> 10)); + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + switch (profile[i].type) { + case MTHCA_RES_QP: + dev->limits.num_qps = profile[i].num; + init_hca->qpc_base = profile[i].start; + init_hca->log_num_qps = profile[i].log_num; + break; + case MTHCA_RES_EEC: + dev->limits.num_eecs = profile[i].num; + init_hca->eec_base = profile[i].start; + init_hca->log_num_eecs = profile[i].log_num; + break; + case MTHCA_RES_SRQ: + dev->limits.num_srqs = profile[i].num; + init_hca->srqc_base = profile[i].start; + init_hca->log_num_srqs = profile[i].log_num; + break; + case MTHCA_RES_CQ: + dev->limits.num_cqs = profile[i].num; + init_hca->cqc_base = profile[i].start; + init_hca->log_num_cqs = profile[i].log_num; + break; + case MTHCA_RES_EQP: + init_hca->eqpc_base = profile[i].start; + break; + case MTHCA_RES_EEEC: + init_hca->eeec_base = profile[i].start; + break; + case MTHCA_RES_EQ: + dev->limits.num_eqs = profile[i].num; + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = profile[i].log_num; + break; + case MTHCA_RES_RDB: + for (dev->qp_table.rdb_shift = 0; + request->num_qp << dev->qp_table.rdb_shift < profile[i].num; + ++dev->qp_table.rdb_shift) + ; /* nothing */ + dev->qp_table.rdb_base = (u32) profile[i].start; + init_hca->rdb_base = profile[i].start; + break; + case MTHCA_RES_MCG: + dev->limits.num_mgms = profile[i].num >> 1; + dev->limits.num_amgms = profile[i].num >> 1; + init_hca->mc_base = profile[i].start; + init_hca->log_mc_entry_sz = ffs(MTHCA_MGM_ENTRY_SIZE) - 1; + init_hca->log_mc_table_sz = profile[i].log_num; + init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1); + break; + case MTHCA_RES_MPT: + dev->limits.num_mpts = profile[i].num; + dev->mr_table.mpt_base = profile[i].start; + init_hca->mpt_base = profile[i].start; + init_hca->log_mpt_sz = profile[i].log_num; + break; + case MTHCA_RES_MTT: + dev->limits.num_mtt_segs = profile[i].num; + dev->mr_table.mtt_base = profile[i].start; + init_hca->mtt_base = profile[i].start; + init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7; + break; + case MTHCA_RES_UAR: + dev->limits.num_uars = profile[i].num; + init_hca->uar_scratch_base = profile[i].start; + break; + case MTHCA_RES_UDAV: + dev->av_table.ddr_av_base = profile[i].start; + dev->av_table.num_ddr_avs = profile[i].num; + break; + case MTHCA_RES_UARC: + dev->uar_table.uarc_size = request->uarc_size; + dev->uar_table.uarc_base = profile[i].start; + init_hca->uarc_base = profile[i].start; + init_hca->log_uarc_sz = ffs(request->uarc_size) - 13; + init_hca->log_uar_sz = ffs(request->num_uar) - 1; + break; + default: + break; + } + } + + /* + * PDs don't take any HCA memory, but we assign them as part + * of the HCA profile anyway. + */ + dev->limits.num_pds = MTHCA_NUM_PDS; + + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT && + init_hca->log_mpt_sz > 23) { + mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n", + init_hca->log_mpt_sz); + mthca_warn(dev, "Disabling memory key throughput optimization.\n"); + dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT; + } + + /* + * For Tavor, FMRs use ioremapped PCI memory. For 32 bit + * systems it may use too much vmalloc space to map all MTT + * memory, so we reserve some MTTs for FMR access, taking them + * out of the MR pool. They don't use additional memory, but + * we assign them as part of the HCA profile anyway. + */ + if (mthca_is_memfree(dev) || BITS_PER_LONG == 64) + dev->limits.fmr_reserved_mtts = 0; + else + dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts; + + kfree(profile); + return total_size; +} diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h new file mode 100644 index 00000000..62b009cc --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_PROFILE_H +#define MTHCA_PROFILE_H + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +struct mthca_profile { + int num_qp; + int rdb_per_qp; + int num_srq; + int num_cq; + int num_mcg; + int num_mpt; + int num_mtt; + int num_udav; + int num_uar; + int uarc_size; + int fmr_reserved_mtts; +}; + +s64 mthca_make_profile(struct mthca_dev *mdev, + struct mthca_profile *request, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca); + +#endif /* MTHCA_PROFILE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c new file mode 100644 index 00000000..5b71d43b --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -0,0 +1,1378 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_user.h" +#include "mthca_memfree.h" + +static void init_query_mad(struct ib_smp *mad) +{ + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} + +static int mthca_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + struct mthca_dev *mdev = to_mdev(ibdev); + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof *props); + + props->fw_ver = mdev->fw_ver; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mthca_MAD_IFC(mdev, 1, 1, + 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + props->device_cap_flags = mdev->device_cap_flags; + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + + props->max_mr_size = ~0ull; + props->page_size_cap = mdev->limits.page_size_cap; + props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; + props->max_qp_wr = mdev->limits.max_wqes; + props->max_sge = mdev->limits.max_sg; + props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; + props->max_cqe = mdev->limits.max_cqes; + props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; + props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; + props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; + props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; + props->max_srq_wr = mdev->limits.max_srq_wqes; + props->max_srq_sge = mdev->limits.max_srq_sge; + props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->max_pkeys = mdev->limits.pkey_table_len; + props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; + props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + /* + * If Sinai memory key optimization is being used, then only + * the 8-bit key portion will change. For other HCAs, the + * unused index bits will also be used for FMR remapping. + */ + if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + props->max_map_per_fmr = 255; + else + props->max_map_per_fmr = + (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1; + + err = 0; + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mthca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof *props); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); + props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; + props->max_msg_sz = 0x80000000; + props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mthca_modify_device(struct ib_device *ibdev, + int mask, + struct ib_device_modify *props) +{ + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) + return -ERESTARTSYS; + memcpy(ibdev->node_desc, props->node_desc, 64); + mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); + } + + return 0; +} + +static int mthca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + struct mthca_set_ib_param set_ib; + struct ib_port_attr attr; + int err; + + if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) + return -ERESTARTSYS; + + err = mthca_query_port(ibdev, port, &attr); + if (err) + goto out; + + set_ib.set_si_guid = 0; + set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR); + + set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & + ~props->clr_port_cap_mask; + + err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port); + if (err) + goto out; +out: + mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); + return err; +} + +static int mthca_query_pkey(struct ib_device *ibdev, + u8 port, u16 index, u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mthca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mthca_alloc_ucontext_resp uresp; + struct mthca_ucontext *context; + int err; + + if (!(to_mdev(ibdev)->active)) + return ERR_PTR(-EAGAIN); + + memset(&uresp, 0, sizeof uresp); + + uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; + if (mthca_is_memfree(to_mdev(ibdev))) + uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size; + else + uresp.uarc_size = 0; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); + if (err) { + kfree(context); + return ERR_PTR(err); + } + + context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); + if (IS_ERR(context->db_tab)) { + err = PTR_ERR(context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(err); + } + + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(-EFAULT); + } + + context->reg_mr_warned = 0; + + return &context->ibucontext; +} + +static int mthca_dealloc_ucontext(struct ib_ucontext *context) +{ + mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab); + mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); + kfree(to_mucontext(context)); + + return 0; +} + +static int mthca_mmap_uar(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mthca_pd *pd; + int err; + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { + mthca_pd_free(to_mdev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + + return &pd->ibpd; +} + +static int mthca_dealloc_pd(struct ib_pd *pd) +{ + mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); + kfree(pd); + + return 0; +} + +static struct ib_ah *mthca_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + int err; + struct mthca_ah *ah; + + ah = kmalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah); + if (err) { + kfree(ah); + return ERR_PTR(err); + } + + return &ah->ibah; +} + +static int mthca_ah_destroy(struct ib_ah *ah) +{ + mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); + kfree(ah); + + return 0; +} + +static struct ib_srq *mthca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mthca_create_srq ucmd; + struct mthca_ucontext *context = NULL; + struct mthca_srq *srq; + int err; + + if (init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + srq = kmalloc(sizeof *srq, GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + err = -EFAULT; + goto err_free; + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index, + ucmd.db_page); + + if (err) + goto err_free; + + srq->mr.ibmr.lkey = ucmd.lkey; + srq->db_index = ucmd.db_index; + } + + err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), + &init_attr->attr, srq); + + if (err && pd->uobject) + mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index); + + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { + mthca_free_srq(to_mdev(pd->device), srq); + err = -EFAULT; + goto err_free; + } + + return &srq->ibsrq; + +err_free: + kfree(srq); + + return ERR_PTR(err); +} + +static int mthca_destroy_srq(struct ib_srq *srq) +{ + struct mthca_ucontext *context; + + if (srq->uobject) { + context = to_mucontext(srq->uobject->context); + + mthca_unmap_user_db(to_mdev(srq->device), &context->uar, + context->db_tab, to_msrq(srq)->db_index); + } + + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + kfree(srq); + + return 0; +} + +static struct ib_qp *mthca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mthca_create_qp ucmd; + struct mthca_qp *qp; + int err; + + if (init_attr->create_flags) + return ERR_PTR(-EINVAL); + + switch (init_attr->qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + { + struct mthca_ucontext *context; + + qp = kmalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + kfree(qp); + return ERR_PTR(-EFAULT); + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.sq_db_index, ucmd.sq_db_page); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.rq_db_index, ucmd.rq_db_page); + if (err) { + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + kfree(qp); + return ERR_PTR(err); + } + + qp->mr.ibmr.lkey = ucmd.lkey; + qp->sq.db_index = ucmd.sq_db_index; + qp->rq.db_index = ucmd.rq_db_index; + } + + err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), + to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq), + init_attr->qp_type, init_attr->sq_sig_type, + &init_attr->cap, qp); + + if (err && pd->uobject) { + context = to_mucontext(pd->uobject->context); + + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.rq_db_index); + } + + qp->ibqp.qp_num = qp->qpn; + break; + } + case IB_QPT_SMI: + case IB_QPT_GSI: + { + /* Don't allow userspace to create special QPs */ + if (pd->uobject) + return ERR_PTR(-EINVAL); + + qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; + + err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), + to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq), + init_attr->sq_sig_type, &init_attr->cap, + qp->ibqp.qp_num, init_attr->port_num, + to_msqp(qp)); + break; + } + default: + /* Don't support raw QPs */ + return ERR_PTR(-ENOSYS); + } + + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + init_attr->cap.max_send_wr = qp->sq.max; + init_attr->cap.max_recv_wr = qp->rq.max; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_recv_sge = qp->rq.max_gs; + init_attr->cap.max_inline_data = qp->max_inline_data; + + return &qp->ibqp; +} + +static int mthca_destroy_qp(struct ib_qp *qp) +{ + if (qp->uobject) { + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->sq.db_index); + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->rq.db_index); + } + mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); + kfree(qp); + return 0; +} + +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mthca_create_cq ucmd; + struct mthca_cq *cq; + int nent; + int err; + + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) + return ERR_PTR(-EINVAL); + + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.set_db_index, ucmd.set_db_page); + if (err) + return ERR_PTR(err); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.arm_db_index, ucmd.arm_db_page); + if (err) + goto err_unmap_set; + } + + cq = kmalloc(sizeof *cq, GFP_KERNEL); + if (!cq) { + err = -ENOMEM; + goto err_unmap_arm; + } + + if (context) { + cq->buf.mr.ibmr.lkey = ucmd.lkey; + cq->set_ci_db_index = ucmd.set_db_index; + cq->arm_db_index = ucmd.arm_db_index; + } + + for (nent = 1; nent <= entries; nent <<= 1) + ; /* nothing */ + + err = mthca_init_cq(to_mdev(ibdev), nent, + context ? to_mucontext(context) : NULL, + context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + cq); + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + mthca_free_cq(to_mdev(ibdev), cq); + goto err_free; + } + + cq->resize_buf = NULL; + + return &cq->ibcq; + +err_free: + kfree(cq); + +err_unmap_arm: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.arm_db_index); + +err_unmap_set: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.set_db_index); + + return ERR_PTR(err); +} + +static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq, + int entries) +{ + int ret; + + spin_lock_irq(&cq->lock); + if (cq->resize_buf) { + ret = -EBUSY; + goto unlock; + } + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) { + ret = -ENOMEM; + goto unlock; + } + + cq->resize_buf->state = CQ_RESIZE_ALLOC; + + ret = 0; + +unlock: + spin_unlock_irq(&cq->lock); + + if (ret) + return ret; + + ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); + if (ret) { + spin_lock_irq(&cq->lock); + kfree(cq->resize_buf); + cq->resize_buf = NULL; + spin_unlock_irq(&cq->lock); + return ret; + } + + cq->resize_buf->cqe = entries - 1; + + spin_lock_irq(&cq->lock); + cq->resize_buf->state = CQ_RESIZE_READY; + spin_unlock_irq(&cq->lock); + + return 0; +} + +static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(ibcq->device); + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_resize_cq ucmd; + u32 lkey; + int ret; + + if (entries < 1 || entries > dev->limits.max_cqes) + return -EINVAL; + + mutex_lock(&cq->mutex); + + entries = roundup_pow_of_two(entries + 1); + if (entries == ibcq->cqe + 1) { + ret = 0; + goto out; + } + + if (cq->is_kernel) { + ret = mthca_alloc_resize_buf(dev, cq, entries); + if (ret) + goto out; + lkey = cq->resize_buf->buf.mr.ibmr.lkey; + } else { + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + ret = -EFAULT; + goto out; + } + lkey = ucmd.lkey; + } + + ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries)); + + if (ret) { + if (cq->resize_buf) { + mthca_free_cq_buf(dev, &cq->resize_buf->buf, + cq->resize_buf->cqe); + kfree(cq->resize_buf); + spin_lock_irq(&cq->lock); + cq->resize_buf = NULL; + spin_unlock_irq(&cq->lock); + } + goto out; + } + + if (cq->is_kernel) { + struct mthca_cq_buf tbuf; + int tcqe; + + spin_lock_irq(&cq->lock); + if (cq->resize_buf->state == CQ_RESIZE_READY) { + mthca_cq_resize_copy_cqes(cq); + tbuf = cq->buf; + tcqe = cq->ibcq.cqe; + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + } else { + tbuf = cq->resize_buf->buf; + tcqe = cq->resize_buf->cqe; + } + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + spin_unlock_irq(&cq->lock); + + mthca_free_cq_buf(dev, &tbuf, tcqe); + } else + ibcq->cqe = entries - 1; + +out: + mutex_unlock(&cq->mutex); + + return ret; +} + +static int mthca_destroy_cq(struct ib_cq *cq) +{ + if (cq->uobject) { + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->arm_db_index); + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->set_ci_db_index); + } + mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); + kfree(cq); + + return 0; +} + +static inline u32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) | + MTHCA_MPT_FLAG_LOCAL_READ; +} + +static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct mthca_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mthca_mr_alloc_notrans(to_mdev(pd->device), + to_mpd(pd)->pd_num, + convert_access(acc), mr); + + if (err) { + kfree(mr); + return ERR_PTR(err); + } + + mr->umem = NULL; + + return &mr->ibmr; +} + +static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, + u64 *iova_start) +{ + struct mthca_mr *mr; + u64 *page_list; + u64 total_size; + unsigned long mask; + int shift; + int npages; + int err; + int i, j, n; + + mask = buffer_list[0].addr ^ *iova_start; + total_size = 0; + for (i = 0; i < num_phys_buf; ++i) { + if (i != 0) + mask |= buffer_list[i].addr; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + + total_size += buffer_list[i].size; + } + + if (mask & ~PAGE_MASK) + return ERR_PTR(-EINVAL); + + shift = __ffs(mask | 1 << 31); + + buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); + buffer_list[0].addr &= ~0ull << shift; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + npages = 0; + for (i = 0; i < num_phys_buf; ++i) + npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; + + if (!npages) + return &mr->ibmr; + + page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL); + if (!page_list) { + kfree(mr); + return ERR_PTR(-ENOMEM); + } + + n = 0; + for (i = 0; i < num_phys_buf; ++i) + for (j = 0; + j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; + ++j) + page_list[n++] = buffer_list[i].addr + ((u64) j << shift); + + mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) " + "in PD %x; shift %d, npages %d.\n", + (unsigned long long) buffer_list[0].addr, + (unsigned long long) *iova_start, + to_mpd(pd)->pd_num, + shift, npages); + + err = mthca_mr_alloc_phys(to_mdev(pd->device), + to_mpd(pd)->pd_num, + page_list, shift, npages, + *iova_start, total_size, + convert_access(acc), mr); + + if (err) { + kfree(page_list); + kfree(mr); + return ERR_PTR(err); + } + + kfree(page_list); + mr->umem = NULL; + + return &mr->ibmr; +} + +static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + struct mthca_mr *mr; + struct mthca_reg_mr ucmd; + u64 *pages; + int shift, n, len; + int i, j, k; + int err = 0; + int write_mtt_size; + + if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) { + if (!to_mucontext(pd->uobject->context)->reg_mr_warned) { + mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n", + current->comm); + mthca_warn(dev, " Update libmthca to fix this.\n"); + } + ++to_mucontext(pd->uobject->context)->reg_mr_warned; + ucmd.mr_attrs = 0; + } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, + ucmd.mr_attrs & MTHCA_MR_DMASYNC); + + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + goto err; + } + + shift = ffs(mr->umem->page_size) - 1; + + n = 0; + list_for_each_entry(chunk, &mr->umem->chunk_list, list) + n += chunk->nents; + + mr->mtt = mthca_alloc_mtt(dev, n); + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); + goto err_umem; + } + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_mtt; + } + + i = n = 0; + + write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); + + list_for_each_entry(chunk, &mr->umem->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(&chunk->page_list[j]) + + mr->umem->page_size * k; + /* + * Be friendly to write_mtt and pass it chunks + * of appropriate size. + */ + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; + } + } + } + + if (i) + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); +mtt_done: + free_page((unsigned long) pages); + if (err) + goto err_mtt; + + err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length, + convert_access(acc), mr); + + if (err) + goto err_mtt; + + return &mr->ibmr; + +err_mtt: + mthca_free_mtt(dev, mr->mtt); + +err_umem: + ib_umem_release(mr->umem); + +err: + kfree(mr); + return ERR_PTR(err); +} + +static int mthca_dereg_mr(struct ib_mr *mr) +{ + struct mthca_mr *mmr = to_mmr(mr); + + mthca_free_mr(to_mdev(mr->device), mmr); + if (mmr->umem) + ib_umem_release(mmr->umem); + kfree(mmr); + + return 0; +} + +static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct mthca_fmr *fmr; + int err; + + fmr = kmalloc(sizeof *fmr, GFP_KERNEL); + if (!fmr) + return ERR_PTR(-ENOMEM); + + memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr); + err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num, + convert_access(mr_access_flags), fmr); + + if (err) { + kfree(fmr); + return ERR_PTR(err); + } + + return &fmr->ibmr; +} + +static int mthca_dealloc_fmr(struct ib_fmr *fmr) +{ + struct mthca_fmr *mfmr = to_mfmr(fmr); + int err; + + err = mthca_free_fmr(to_mdev(fmr->device), mfmr); + if (err) + return err; + + kfree(mfmr); + return 0; +} + +static int mthca_unmap_fmr(struct list_head *fmr_list) +{ + struct ib_fmr *fmr; + int err; + struct mthca_dev *mdev = NULL; + + list_for_each_entry(fmr, fmr_list, list) { + if (mdev && to_mdev(fmr->device) != mdev) + return -EINVAL; + mdev = to_mdev(fmr->device); + } + + if (!mdev) + return 0; + + if (mthca_is_memfree(mdev)) { + list_for_each_entry(fmr, fmr_list, list) + mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr)); + + wmb(); + } else + list_for_each_entry(fmr, fmr_list, list) + mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr)); + + err = mthca_SYNC_TPT(mdev); + return err; +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mthca_dev *dev = + container_of(device, struct mthca_dev, ib_dev.dev); + return sprintf(buf, "%x\n", dev->rev_id); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mthca_dev *dev = + container_of(device, struct mthca_dev, ib_dev.dev); + return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32), + (int) (dev->fw_ver >> 16) & 0xffff, + (int) dev->fw_ver & 0xffff); +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mthca_dev *dev = + container_of(device, struct mthca_dev, ib_dev.dev); + switch (dev->pdev->device) { + case PCI_DEVICE_ID_MELLANOX_TAVOR: + return sprintf(buf, "MT23108\n"); + case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: + return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); + case PCI_DEVICE_ID_MELLANOX_ARBEL: + return sprintf(buf, "MT25208\n"); + case PCI_DEVICE_ID_MELLANOX_SINAI: + case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: + return sprintf(buf, "MT25204\n"); + default: + return sprintf(buf, "unknown\n"); + } +} + +static ssize_t show_board(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mthca_dev *dev = + container_of(device, struct mthca_dev, ib_dev.dev); + return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *mthca_dev_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id +}; + +static int mthca_init_node_data(struct mthca_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mthca_MAD_IFC(dev, 1, 1, + 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mthca_MAD_IFC(dev, 1, 1, + 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + if (mthca_is_memfree(dev)) + dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mthca_register_device(struct mthca_dev *dev) +{ + int ret; + int i; + + ret = mthca_init_node_data(dev); + if (ret) + return ret; + + strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + + dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.phys_port_cnt = dev->limits.num_ports; + dev->ib_dev.num_comp_vectors = 1; + dev->ib_dev.dma_device = &dev->pdev->dev; + dev->ib_dev.query_device = mthca_query_device; + dev->ib_dev.query_port = mthca_query_port; + dev->ib_dev.modify_device = mthca_modify_device; + dev->ib_dev.modify_port = mthca_modify_port; + dev->ib_dev.query_pkey = mthca_query_pkey; + dev->ib_dev.query_gid = mthca_query_gid; + dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; + dev->ib_dev.mmap = mthca_mmap_uar; + dev->ib_dev.alloc_pd = mthca_alloc_pd; + dev->ib_dev.dealloc_pd = mthca_dealloc_pd; + dev->ib_dev.create_ah = mthca_ah_create; + dev->ib_dev.query_ah = mthca_ah_query; + dev->ib_dev.destroy_ah = mthca_ah_destroy; + + if (dev->mthca_flags & MTHCA_FLAG_SRQ) { + dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; + dev->ib_dev.query_srq = mthca_query_srq; + dev->ib_dev.destroy_srq = mthca_destroy_srq; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + if (mthca_is_memfree(dev)) + dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + else + dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + } + + dev->ib_dev.create_qp = mthca_create_qp; + dev->ib_dev.modify_qp = mthca_modify_qp; + dev->ib_dev.query_qp = mthca_query_qp; + dev->ib_dev.destroy_qp = mthca_destroy_qp; + dev->ib_dev.create_cq = mthca_create_cq; + dev->ib_dev.resize_cq = mthca_resize_cq; + dev->ib_dev.destroy_cq = mthca_destroy_cq; + dev->ib_dev.poll_cq = mthca_poll_cq; + dev->ib_dev.get_dma_mr = mthca_get_dma_mr; + dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; + dev->ib_dev.reg_user_mr = mthca_reg_user_mr; + dev->ib_dev.dereg_mr = mthca_dereg_mr; + + if (dev->mthca_flags & MTHCA_FLAG_FMR) { + dev->ib_dev.alloc_fmr = mthca_alloc_fmr; + dev->ib_dev.unmap_fmr = mthca_unmap_fmr; + dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr; + if (mthca_is_memfree(dev)) + dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr; + else + dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr; + } + + dev->ib_dev.attach_mcast = mthca_multicast_attach; + dev->ib_dev.detach_mcast = mthca_multicast_detach; + dev->ib_dev.process_mad = mthca_process_mad; + + if (mthca_is_memfree(dev)) { + dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq; + dev->ib_dev.post_send = mthca_arbel_post_send; + dev->ib_dev.post_recv = mthca_arbel_post_receive; + } else { + dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq; + dev->ib_dev.post_send = mthca_tavor_post_send; + dev->ib_dev.post_recv = mthca_tavor_post_receive; + } + + mutex_init(&dev->cap_mask_mutex); + + ret = ib_register_device(&dev->ib_dev, NULL); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) { + ret = device_create_file(&dev->ib_dev.dev, + mthca_dev_attributes[i]); + if (ret) { + ib_unregister_device(&dev->ib_dev); + return ret; + } + } + + mthca_start_catas_poll(dev); + + return 0; +} + +void mthca_unregister_device(struct mthca_dev *dev) +{ + mthca_stop_catas_poll(dev); + ib_unregister_device(&dev->ib_dev); +} diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h new file mode 100644 index 00000000..596acc45 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_PROVIDER_H +#define MTHCA_PROVIDER_H + +#include +#include + +#define MTHCA_MPT_FLAG_ATOMIC (1 << 14) +#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) +#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12) +#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11) +#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10) + +struct mthca_buf_list { + void *buf; + DEFINE_DMA_UNMAP_ADDR(mapping); +}; + +union mthca_buf { + struct mthca_buf_list direct; + struct mthca_buf_list *page_list; +}; + +struct mthca_uar { + unsigned long pfn; + int index; +}; + +struct mthca_user_db_table; + +struct mthca_ucontext { + struct ib_ucontext ibucontext; + struct mthca_uar uar; + struct mthca_user_db_table *db_tab; + int reg_mr_warned; +}; + +struct mthca_mtt; + +struct mthca_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct mthca_mtt *mtt; +}; + +struct mthca_fmr { + struct ib_fmr ibmr; + struct ib_fmr_attr attr; + struct mthca_mtt *mtt; + int maps; + union { + struct { + struct mthca_mpt_entry __iomem *mpt; + u64 __iomem *mtts; + } tavor; + struct { + struct mthca_mpt_entry *mpt; + __be64 *mtts; + dma_addr_t dma_handle; + } arbel; + } mem; +}; + +struct mthca_pd { + struct ib_pd ibpd; + u32 pd_num; + atomic_t sqp_count; + struct mthca_mr ntmr; + int privileged; +}; + +struct mthca_eq { + struct mthca_dev *dev; + int eqn; + u32 eqn_mask; + u32 cons_index; + u16 msi_x_vector; + u16 msi_x_entry; + int have_irq; + int nent; + struct mthca_buf_list *page_list; + struct mthca_mr mr; + char irq_name[IB_DEVICE_NAME_MAX]; +}; + +struct mthca_av; + +enum mthca_ah_type { + MTHCA_AH_ON_HCA, + MTHCA_AH_PCI_POOL, + MTHCA_AH_KMALLOC +}; + +struct mthca_ah { + struct ib_ah ibah; + enum mthca_ah_type type; + u32 key; + struct mthca_av *av; + dma_addr_t avdma; +}; + +/* + * Quick description of our CQ/QP locking scheme: + * + * We have one global lock that protects dev->cq/qp_table. Each + * struct mthca_cq/qp also has its own lock. An individual qp lock + * may be taken inside of an individual cq lock. Both cqs attached to + * a qp may be locked, with the cq with the lower cqn locked first. + * No other nesting should be done. + * + * Each struct mthca_cq/qp also has an ref count, protected by the + * corresponding table lock. The pointer from the cq/qp_table to the + * struct counts as one reference. This reference also is good for + * access through the consumer API, so modifying the CQ/QP etc doesn't + * need to take another reference. Access to a QP because of a + * completion being polled does not need a reference either. + * + * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the + * destroy function to sleep on. + * + * This means that access from the consumer API requires nothing but + * taking the struct's lock. + * + * Access because of a completion event should go as follows: + * - lock cq/qp_table and look up struct + * - increment ref count in struct + * - drop cq/qp_table lock + * - lock struct, do your thing, and unlock struct + * - decrement ref count; if zero, wake up waiters + * + * To destroy a CQ/QP, we can do the following: + * - lock cq/qp_table + * - remove pointer and decrement ref count + * - unlock cq/qp_table lock + * - wait_event until ref count is zero + * + * It is the consumer's responsibilty to make sure that no QP + * operations (WQE posting or state modification) are pending when a + * QP is destroyed. Also, the consumer must make sure that calls to + * qp_modify are serialized. Similarly, the consumer is responsible + * for ensuring that no CQ resize operations are pending when a CQ + * is destroyed. + * + * Possible optimizations (wait for profile data to see if/where we + * have locks bouncing between CPUs): + * - split cq/qp table lock into n separate (cache-aligned) locks, + * indexed (say) by the page in the table + * - split QP struct lock into three (one for common info, one for the + * send queue and one for the receive queue) + */ + +struct mthca_cq_buf { + union mthca_buf queue; + struct mthca_mr mr; + int is_direct; +}; + +struct mthca_cq_resize { + struct mthca_cq_buf buf; + int cqe; + enum { + CQ_RESIZE_ALLOC, + CQ_RESIZE_READY, + CQ_RESIZE_SWAPPED + } state; +}; + +struct mthca_cq { + struct ib_cq ibcq; + spinlock_t lock; + int refcount; + int cqn; + u32 cons_index; + struct mthca_cq_buf buf; + struct mthca_cq_resize *resize_buf; + int is_kernel; + + /* Next fields are Arbel only */ + int set_ci_db_index; + __be32 *set_ci_db; + int arm_db_index; + __be32 *arm_db; + int arm_sn; + + wait_queue_head_t wait; + struct mutex mutex; +}; + +struct mthca_srq { + struct ib_srq ibsrq; + spinlock_t lock; + int refcount; + int srqn; + int max; + int max_gs; + int wqe_shift; + int first_free; + int last_free; + u16 counter; /* Arbel only */ + int db_index; /* Arbel only */ + __be32 *db; /* Arbel only */ + void *last; + + int is_direct; + u64 *wrid; + union mthca_buf queue; + struct mthca_mr mr; + + wait_queue_head_t wait; + struct mutex mutex; +}; + +struct mthca_wq { + spinlock_t lock; + int max; + unsigned next_ind; + unsigned last_comp; + unsigned head; + unsigned tail; + void *last; + int max_gs; + int wqe_shift; + + int db_index; /* Arbel only */ + __be32 *db; +}; + +struct mthca_qp { + struct ib_qp ibqp; + int refcount; + u32 qpn; + int is_direct; + u8 port; /* for SQP and memfree use only */ + u8 alt_port; /* for memfree use only */ + u8 transport; + u8 state; + u8 atomic_rd_en; + u8 resp_depth; + + struct mthca_mr mr; + + struct mthca_wq rq; + struct mthca_wq sq; + enum ib_sig_type sq_policy; + int send_wqe_offset; + int max_inline_data; + + u64 *wrid; + union mthca_buf queue; + + wait_queue_head_t wait; + struct mutex mutex; +}; + +struct mthca_sqp { + struct mthca_qp qp; + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + int header_buf_size; + void *header_buf; + dma_addr_t header_dma; +}; + +static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mthca_ucontext, ibucontext); +} + +static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) +{ + return container_of(ibmr, struct mthca_fmr, ibmr); +} + +static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mthca_mr, ibmr); +} + +static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mthca_pd, ibpd); +} + +static inline struct mthca_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mthca_ah, ibah); +} + +static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mthca_cq, ibcq); +} + +static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mthca_srq, ibsrq); +} + +static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mthca_qp, ibqp); +} + +static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) +{ + return container_of(qp, struct mthca_sqp, qp); +} + +#endif /* MTHCA_PROVIDER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c new file mode 100644 index 00000000..9601049e --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -0,0 +1,2308 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include + +#include +#include +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + +enum { + MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, + MTHCA_ACK_REQ_FREQ = 10, + MTHCA_FLIGHT_LIMIT = 9, + MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */ + MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */ + MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */ +}; + +enum { + MTHCA_QP_STATE_RST = 0, + MTHCA_QP_STATE_INIT = 1, + MTHCA_QP_STATE_RTR = 2, + MTHCA_QP_STATE_RTS = 3, + MTHCA_QP_STATE_SQE = 4, + MTHCA_QP_STATE_SQD = 5, + MTHCA_QP_STATE_ERR = 6, + MTHCA_QP_STATE_DRAINING = 7 +}; + +enum { + MTHCA_QP_ST_RC = 0x0, + MTHCA_QP_ST_UC = 0x1, + MTHCA_QP_ST_RD = 0x2, + MTHCA_QP_ST_UD = 0x3, + MTHCA_QP_ST_MLX = 0x7 +}; + +enum { + MTHCA_QP_PM_MIGRATED = 0x3, + MTHCA_QP_PM_ARMED = 0x0, + MTHCA_QP_PM_REARM = 0x1 +}; + +enum { + /* qp_context flags */ + MTHCA_QP_BIT_DE = 1 << 8, + /* params1 */ + MTHCA_QP_BIT_SRE = 1 << 15, + MTHCA_QP_BIT_SWE = 1 << 14, + MTHCA_QP_BIT_SAE = 1 << 13, + MTHCA_QP_BIT_SIC = 1 << 4, + MTHCA_QP_BIT_SSC = 1 << 3, + /* params2 */ + MTHCA_QP_BIT_RRE = 1 << 15, + MTHCA_QP_BIT_RWE = 1 << 14, + MTHCA_QP_BIT_RAE = 1 << 13, + MTHCA_QP_BIT_RIC = 1 << 4, + MTHCA_QP_BIT_RSC = 1 << 3 +}; + +enum { + MTHCA_SEND_DOORBELL_FENCE = 1 << 5 +}; + +struct mthca_qp_path { + __be32 port_pkey; + u8 rnr_retry; + u8 g_mylmc; + __be16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 rgid[16]; +} __attribute__((packed)); + +struct mthca_qp_context { + __be32 flags; + __be32 tavor_sched_queue; /* Reserved on Arbel */ + u8 mtu_msgmax; + u8 rq_size_stride; /* Reserved on Tavor */ + u8 sq_size_stride; /* Reserved on Tavor */ + u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ + __be32 usr_page; + __be32 local_qpn; + __be32 remote_qpn; + u32 reserved1[2]; + struct mthca_qp_path pri_path; + struct mthca_qp_path alt_path; + __be32 rdd; + __be32 pd; + __be32 wqe_base; + __be32 wqe_lkey; + __be32 params1; + __be32 reserved2; + __be32 next_send_psn; + __be32 cqn_snd; + __be32 snd_wqe_base_l; /* Next send WQE on Tavor */ + __be32 snd_db_index; /* (debugging only entries) */ + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 ra_buff_indx; + __be32 cqn_rcv; + __be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ + __be32 rcv_db_index; /* (debugging only entries) */ + __be32 qkey; + __be32 srqn; + __be32 rmsn; + __be16 rq_wqe_counter; /* reserved on Tavor */ + __be16 sq_wqe_counter; /* reserved on Tavor */ + u32 reserved3[18]; +} __attribute__((packed)); + +struct mthca_qp_param { + __be32 opt_param_mask; + u32 reserved1; + struct mthca_qp_context context; + u32 reserved2[62]; +} __attribute__((packed)); + +enum { + MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MTHCA_QP_OPTPAR_RRE = 1 << 1, + MTHCA_QP_OPTPAR_RAE = 1 << 2, + MTHCA_QP_OPTPAR_RWE = 1 << 3, + MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MTHCA_QP_OPTPAR_Q_KEY = 1 << 5, + MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8, + MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9, + MTHCA_QP_OPTPAR_PM_STATE = 1 << 10, + MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11, + MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13, + MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15, + MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16 +}; + +static const u8 mthca_opcode[] = { + [IB_WR_SEND] = MTHCA_OPCODE_SEND, + [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM, + [IB_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM, + [IB_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS, + [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA, +}; + +static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp) +{ + return qp->qpn >= dev->qp_table.sqp_start && + qp->qpn <= dev->qp_table.sqp_start + 3; +} + +static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp) +{ + return qp->qpn >= dev->qp_table.sqp_start && + qp->qpn <= dev->qp_table.sqp_start + 1; +} + +static void *get_recv_wqe(struct mthca_qp *qp, int n) +{ + if (qp->is_direct) + return qp->queue.direct.buf + (n << qp->rq.wqe_shift); + else + return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf + + ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1)); +} + +static void *get_send_wqe(struct mthca_qp *qp, int n) +{ + if (qp->is_direct) + return qp->queue.direct.buf + qp->send_wqe_offset + + (n << qp->sq.wqe_shift); + else + return qp->queue.page_list[(qp->send_wqe_offset + + (n << qp->sq.wqe_shift)) >> + PAGE_SHIFT].buf + + ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) & + (PAGE_SIZE - 1)); +} + +static void mthca_wq_reset(struct mthca_wq *wq) +{ + wq->next_ind = 0; + wq->last_comp = wq->max - 1; + wq->head = 0; + wq->tail = 0; +} + +void mthca_qp_event(struct mthca_dev *dev, u32 qpn, + enum ib_event_type event_type) +{ + struct mthca_qp *qp; + struct ib_event event; + + spin_lock(&dev->qp_table.lock); + qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); + if (qp) + ++qp->refcount; + spin_unlock(&dev->qp_table.lock); + + if (!qp) { + mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); + return; + } + + if (event_type == IB_EVENT_PATH_MIG) + qp->port = qp->alt_port; + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.qp = &qp->ibqp; + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&event, qp->ibqp.qp_context); + + spin_lock(&dev->qp_table.lock); + if (!--qp->refcount) + wake_up(&qp->wait); + spin_unlock(&dev->qp_table.lock); +} + +static int to_mthca_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: return MTHCA_QP_STATE_RST; + case IB_QPS_INIT: return MTHCA_QP_STATE_INIT; + case IB_QPS_RTR: return MTHCA_QP_STATE_RTR; + case IB_QPS_RTS: return MTHCA_QP_STATE_RTS; + case IB_QPS_SQD: return MTHCA_QP_STATE_SQD; + case IB_QPS_SQE: return MTHCA_QP_STATE_SQE; + case IB_QPS_ERR: return MTHCA_QP_STATE_ERR; + default: return -1; + } +} + +enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS }; + +static int to_mthca_st(int transport) +{ + switch (transport) { + case RC: return MTHCA_QP_ST_RC; + case UC: return MTHCA_QP_ST_UC; + case UD: return MTHCA_QP_ST_UD; + case RD: return MTHCA_QP_ST_RD; + case MLX: return MTHCA_QP_ST_MLX; + default: return -1; + } +} + +static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr, + int attr_mask) +{ + if (attr_mask & IB_QP_PKEY_INDEX) + sqp->pkey_index = attr->pkey_index; + if (attr_mask & IB_QP_QKEY) + sqp->qkey = attr->qkey; + if (attr_mask & IB_QP_SQ_PSN) + sqp->send_psn = attr->sq_psn; +} + +static void init_port(struct mthca_dev *dev, int port) +{ + int err; + struct mthca_init_ib_param param; + + memset(¶m, 0, sizeof param); + + param.port_width = dev->limits.port_width_cap; + param.vl_cap = dev->limits.vl_cap; + param.mtu_cap = dev->limits.mtu_cap; + param.gid_cap = dev->limits.gid_table_len; + param.pkey_cap = dev->limits.pkey_table_len; + + err = mthca_INIT_IB(dev, ¶m, port); + if (err) + mthca_warn(dev, "INIT_IB failed, return code %d.\n", err); +} + +static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr, + int attr_mask) +{ + u8 dest_rd_atomic; + u32 access_flags; + u32 hw_access_flags = 0; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MTHCA_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= MTHCA_QP_BIT_RAE; + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MTHCA_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + +static inline enum ib_qp_state to_ib_qp_state(int mthca_state) +{ + switch (mthca_state) { + case MTHCA_QP_STATE_RST: return IB_QPS_RESET; + case MTHCA_QP_STATE_INIT: return IB_QPS_INIT; + case MTHCA_QP_STATE_RTR: return IB_QPS_RTR; + case MTHCA_QP_STATE_RTS: return IB_QPS_RTS; + case MTHCA_QP_STATE_DRAINING: + case MTHCA_QP_STATE_SQD: return IB_QPS_SQD; + case MTHCA_QP_STATE_SQE: return IB_QPS_SQE; + case MTHCA_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state) +{ + switch (mthca_mig_state) { + case 0: return IB_MIG_ARMED; + case 1: return IB_MIG_REARM; + case 3: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mthca_flags) +{ + int ib_flags = 0; + + if (mthca_flags & MTHCA_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mthca_flags & MTHCA_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mthca_flags & MTHCA_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr, + struct mthca_qp_path *path) +{ + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); + ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports) + return; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; + ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; + ib_ah_attr->static_rate = mthca_rate_to_ib(dev, + path->static_rate & 0xf, + ib_ah_attr->port_num); + ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1); + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof ib_ah_attr->grh.dgid.raw); + } +} + +int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + int err = 0; + struct mthca_mailbox *mailbox = NULL; + struct mthca_qp_param *qp_param; + struct mthca_qp_context *context; + int mthca_state; + + mutex_lock(&qp->mutex); + + if (qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto out; + } + + err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox); + if (err) { + mthca_warn(dev, "QUERY_QP failed (%d)\n", err); + goto out_mailbox; + } + + qp_param = mailbox->buf; + context = &qp_param->context; + mthca_state = be32_to_cpu(context->flags) >> 28; + + qp->state = to_ib_qp_state(mthca_state); + qp_attr->qp_state = qp->state; + qp_attr->path_mtu = context->mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context->qkey); + qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context->params2)); + + if (qp->transport == RC || qp->transport == UC) { + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); + qp_attr->alt_pkey_index = + be32_to_cpu(context->alt_path.port_pkey) & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; + qp_attr->port_num = + (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context->pri_path.ackto >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; + qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5; + qp_attr->alt_timeout = context->alt_path.ackto >> 3; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_send_wr = qp->sq.max; + qp_attr->cap.max_recv_wr = qp->rq.max; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + qp_attr->cap.max_inline_data = qp->max_inline_data; + + qp_init_attr->cap = qp_attr->cap; + +out_mailbox: + mthca_free_mailbox(dev, mailbox); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah, + struct mthca_qp_path *path, u8 port) +{ + path->g_mylmc = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + path->static_rate = mthca_get_rate(dev, ah->static_rate, port); + + if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= dev->limits.gid_table_len) { + mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, dev->limits.gid_table_len-1); + return -1; + } + + path->g_mylmc |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->sl_tclass_flowlabel = + cpu_to_be32((ah->sl << 28) | + (ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } else + path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28); + + return 0; +} + +static int __mthca_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + struct mthca_mailbox *mailbox; + struct mthca_qp_param *qp_param; + struct mthca_qp_context *qp_context; + u32 sqd_event = 0; + int err = -EINVAL; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto out; + } + qp_param = mailbox->buf; + qp_context = &qp_param->context; + memset(qp_param, 0, sizeof *qp_param); + + qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) | + (to_mthca_st(qp->transport) << 16)); + qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE); + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); + else { + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE); + switch (attr->path_mig_state) { + case IB_MIG_MIGRATED: + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); + break; + case IB_MIG_REARM: + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11); + break; + case IB_MIG_ARMED: + qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11); + break; + } + } + + /* leave tavor_sched_queue as 0 */ + + if (qp->transport == MLX || qp->transport == UD) + qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11; + else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) { + mthca_dbg(dev, "path MTU (%u) is invalid\n", + attr->path_mtu); + goto out_mailbox; + } + qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31; + } + + if (mthca_is_memfree(dev)) { + if (qp->rq.max) + qp_context->rq_size_stride = ilog2(qp->rq.max) << 3; + qp_context->rq_size_stride |= qp->rq.wqe_shift - 4; + + if (qp->sq.max) + qp_context->sq_size_stride = ilog2(qp->sq.max) << 3; + qp_context->sq_size_stride |= qp->sq.wqe_shift - 4; + } + + /* leave arbel_sched_queue as 0 */ + + if (qp->ibqp.uobject) + qp_context->usr_page = + cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); + else + qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); + qp_context->local_qpn = cpu_to_be32(qp->qpn); + if (attr_mask & IB_QP_DEST_QPN) { + qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); + } + + if (qp->transport == MLX) + qp_context->pri_path.port_pkey |= + cpu_to_be32(qp->port << 24); + else { + if (attr_mask & IB_QP_PORT) { + qp_context->pri_path.port_pkey |= + cpu_to_be32(attr->port_num << 24); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM); + } + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + qp_context->pri_path.port_pkey |= + cpu_to_be32(attr->pkey_index); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX); + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry = + attr->rnr_retry << 5; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | + MTHCA_QP_OPTPAR_ALT_RNR_RETRY); + } + + if (attr_mask & IB_QP_AV) { + if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path, + attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) + goto out_mailbox; + + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); + } + + if (ibqp->qp_type == IB_QPT_RC && + cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + u8 sched_queue = ibqp->uobject ? 0x2 : 0x1; + + if (mthca_is_memfree(dev)) + qp_context->rlkey_arbel_sched_queue |= sched_queue; + else + qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue); + + qp_param->opt_param_mask |= + cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE); + } + + if (attr_mask & IB_QP_TIMEOUT) { + qp_context->pri_path.ackto = attr->timeout << 3; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_pkey_index >= dev->limits.pkey_table_len) { + mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n", + attr->alt_pkey_index, dev->limits.pkey_table_len-1); + goto out_mailbox; + } + + if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) { + mthca_dbg(dev, "Alternate port number (%u) is invalid\n", + attr->alt_port_num); + goto out_mailbox; + } + + if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path, + attr->alt_ah_attr.port_num)) + goto out_mailbox; + + qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | + attr->alt_port_num << 24); + qp_context->alt_path.ackto = attr->alt_timeout << 3; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH); + } + + /* leave rdd as 0 */ + qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num); + /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */ + qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey); + qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) | + (MTHCA_FLIGHT_LIMIT << 24) | + MTHCA_QP_BIT_SWE); + if (qp->sq_policy == IB_SIGNAL_ALL_WR) + qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC); + if (attr_mask & IB_QP_RETRY_CNT) { + qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic) { + qp_context->params1 |= + cpu_to_be32(MTHCA_QP_BIT_SRE | + MTHCA_QP_BIT_SAE); + qp_context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + } + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); + } + + if (attr_mask & IB_QP_SQ_PSN) + qp_context->next_send_psn = cpu_to_be32(attr->sq_psn); + qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn); + + if (mthca_is_memfree(dev)) { + qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset); + qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index); + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic) + qp_context->params2 |= + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); + + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); + } + + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | + MTHCA_QP_OPTPAR_RRE | + MTHCA_QP_OPTPAR_RAE); + } + + qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); + + if (ibqp->srq) + qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC); + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); + } + if (attr_mask & IB_QP_RQ_PSN) + qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + + qp_context->ra_buff_indx = + cpu_to_be32(dev->qp_table.rdb_base + + ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE << + dev->qp_table.rdb_shift)); + + qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn); + + if (mthca_is_memfree(dev)) + qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index); + + if (attr_mask & IB_QP_QKEY) { + qp_context->qkey = cpu_to_be32(attr->qkey); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY); + } + + if (ibqp->srq) + qp_context->srqn = cpu_to_be32(1 << 24 | + to_msrq(ibqp->srq)->srqn); + + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && + attr->en_sqd_async_notify) + sqd_event = 1 << 31; + + err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0, + mailbox, sqd_event); + if (err) { + mthca_warn(dev, "modify QP %d->%d returned %d.\n", + cur_state, new_state, err); + goto out_mailbox; + } + + qp->state = new_state; + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_port = attr->alt_port_num; + + if (is_sqp(dev, qp)) + store_attrs(to_msqp(qp), attr, attr_mask); + + /* + * If we moved QP0 to RTR, bring the IB link up; if we moved + * QP0 to RESET or ERROR, bring the link back down. + */ + if (is_qp0(dev, qp)) { + if (cur_state != IB_QPS_RTR && + new_state == IB_QPS_RTR) + init_port(dev, qp->port); + + if (cur_state != IB_QPS_RESET && + cur_state != IB_QPS_ERR && + (new_state == IB_QPS_RESET || + new_state == IB_QPS_ERR)) + mthca_CLOSE_IB(dev, qp->port); + } + + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL); + + mthca_wq_reset(&qp->sq); + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + + mthca_wq_reset(&qp->rq); + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); + + if (mthca_is_memfree(dev)) { + *qp->sq.db = 0; + *qp->rq.db = 0; + } + } + +out_mailbox: + mthca_free_mailbox(dev, mailbox); +out: + return err; +} + +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + + mutex_lock(&qp->mutex); + if (attr_mask & IB_QP_CUR_STATE) { + cur_state = attr->cur_qp_state; + } else { + spin_lock_irq(&qp->sq.lock); + spin_lock(&qp->rq.lock); + cur_state = qp->state; + spin_unlock(&qp->rq.lock); + spin_unlock_irq(&qp->sq.lock); + } + + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + mthca_dbg(dev, "Bad QP transition (transport %d) " + "%d->%d with attr 0x%08x\n", + qp->transport, cur_state, new_state, + attr_mask); + goto out; + } + + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->limits.pkey_table_len) { + mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", + attr->pkey_index, dev->limits.pkey_table_len-1); + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { + mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { + mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", + attr->max_rd_atomic, dev->limits.max_qp_init_rdma); + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { + mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", + attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); + goto out; + } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + + err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz) +{ + /* + * Calculate the maximum size of WQE s/g segments, excluding + * the next segment and other non-data segments. + */ + int max_data_size = desc_sz - sizeof (struct mthca_next_seg); + + switch (qp->transport) { + case MLX: + max_data_size -= 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + if (mthca_is_memfree(dev)) + max_data_size -= sizeof (struct mthca_arbel_ud_seg); + else + max_data_size -= sizeof (struct mthca_tavor_ud_seg); + break; + + default: + max_data_size -= sizeof (struct mthca_raddr_seg); + break; + } + + return max_data_size; +} + +static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size) +{ + /* We don't support inline data for kernel QPs (yet). */ + return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0; +} + +static void mthca_adjust_qp_caps(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int max_data_size = mthca_max_data_size(dev, qp, + min(dev->limits.max_desc_sz, + 1 << qp->sq.wqe_shift)); + + qp->max_inline_data = mthca_max_inline_data(pd, max_data_size); + + qp->sq.max_gs = min_t(int, dev->limits.max_sg, + max_data_size / sizeof (struct mthca_data_seg)); + qp->rq.max_gs = min_t(int, dev->limits.max_sg, + (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) - + sizeof (struct mthca_next_seg)) / + sizeof (struct mthca_data_seg)); +} + +/* + * Allocate and register buffer for WQEs. qp->rq.max, sq.max, + * rq.max_gs and sq.max_gs must all be assigned. + * mthca_alloc_wqe_buf will calculate rq.wqe_shift and + * sq.wqe_shift (as well as send_wqe_offset, is_direct, and + * queue) + */ +static int mthca_alloc_wqe_buf(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int size; + int err = -ENOMEM; + + size = sizeof (struct mthca_next_seg) + + qp->rq.max_gs * sizeof (struct mthca_data_seg); + + if (size > dev->limits.max_desc_sz) + return -EINVAL; + + for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; + qp->rq.wqe_shift++) + ; /* nothing */ + + size = qp->sq.max_gs * sizeof (struct mthca_data_seg); + switch (qp->transport) { + case MLX: + size += 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + size += mthca_is_memfree(dev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg); + break; + + case UC: + size += sizeof (struct mthca_raddr_seg); + break; + + case RC: + size += sizeof (struct mthca_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + size = max_t(int, size, + sizeof (struct mthca_atomic_seg) + + sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_data_seg)); + break; + + default: + break; + } + + /* Make sure that we have enough space for a bind request */ + size = max_t(int, size, sizeof (struct mthca_bind_seg)); + + size += sizeof (struct mthca_next_seg); + + if (size > dev->limits.max_desc_sz) + return -EINVAL; + + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; + qp->sq.wqe_shift++) + ; /* nothing */ + + qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, + 1 << qp->sq.wqe_shift); + + /* + * If this is a userspace QP, we don't actually have to + * allocate anything. All we need is to calculate the WQE + * sizes and the send_wqe_offset, so we're done now. + */ + if (pd->ibpd.uobject) + return 0; + + size = PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)); + + qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64), + GFP_KERNEL); + if (!qp->wrid) + goto err_out; + + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE, + &qp->queue, &qp->is_direct, pd, 0, &qp->mr); + if (err) + goto err_out; + + return 0; + +err_out: + kfree(qp->wrid); + return err; +} + +static void mthca_free_wqe_buf(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)), + &qp->queue, qp->is_direct, &qp->mr); + kfree(qp->wrid); +} + +static int mthca_map_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret; + + if (mthca_is_memfree(dev)) { + ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); + if (ret) + return ret; + + ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn); + if (ret) + goto err_qpc; + + ret = mthca_table_get(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + if (ret) + goto err_eqpc; + + } + + return 0; + +err_eqpc: + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + +err_qpc: + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); + + return ret; +} + +static void mthca_unmap_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_table_put(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); +} + +static int mthca_alloc_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + if (mthca_is_memfree(dev)) { + qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, + qp->qpn, &qp->rq.db); + if (qp->rq.db_index < 0) + return -ENOMEM; + + qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, + qp->qpn, &qp->sq.db); + if (qp->sq.db_index < 0) { + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + return -ENOMEM; + } + } + + return 0; +} + +static void mthca_free_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + } +} + +static int mthca_alloc_qp_common(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + struct mthca_qp *qp) +{ + int ret; + int i; + struct mthca_next_seg *next; + + qp->refcount = 1; + init_waitqueue_head(&qp->wait); + mutex_init(&qp->mutex); + qp->state = IB_QPS_RESET; + qp->atomic_rd_en = 0; + qp->resp_depth = 0; + qp->sq_policy = send_policy; + mthca_wq_reset(&qp->sq); + mthca_wq_reset(&qp->rq); + + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + + ret = mthca_map_memfree(dev, qp); + if (ret) + return ret; + + ret = mthca_alloc_wqe_buf(dev, pd, qp); + if (ret) { + mthca_unmap_memfree(dev, qp); + return ret; + } + + mthca_adjust_qp_caps(dev, pd, qp); + + /* + * If this is a userspace QP, we're done now. The doorbells + * will be allocated and buffers will be initialized in + * userspace. + */ + if (pd->ibpd.uobject) + return 0; + + ret = mthca_alloc_memfree(dev, qp); + if (ret) { + mthca_free_wqe_buf(dev, qp); + mthca_unmap_memfree(dev, qp); + return ret; + } + + if (mthca_is_memfree(dev)) { + struct mthca_data_seg *scatter; + int size = (sizeof (struct mthca_next_seg) + + qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; + + for (i = 0; i < qp->rq.max; ++i) { + next = get_recv_wqe(qp, i); + next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) << + qp->rq.wqe_shift); + next->ee_nds = cpu_to_be32(size); + + for (scatter = (void *) (next + 1); + (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift); + ++scatter) + scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + } + + for (i = 0; i < qp->sq.max; ++i) { + next = get_send_wqe(qp, i); + next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) << + qp->sq.wqe_shift) + + qp->send_wqe_offset); + } + } else { + for (i = 0; i < qp->rq.max; ++i) { + next = get_recv_wqe(qp, i); + next->nda_op = htonl((((i + 1) % qp->rq.max) << + qp->rq.wqe_shift) | 1); + } + + } + + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); + + return 0; +} + +static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, + struct mthca_pd *pd, struct mthca_qp *qp) +{ + int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz); + + /* Sanity check QP size before proceeding */ + if (cap->max_send_wr > dev->limits.max_wqes || + cap->max_recv_wr > dev->limits.max_wqes || + cap->max_send_sge > dev->limits.max_sg || + cap->max_recv_sge > dev->limits.max_sg || + cap->max_inline_data > mthca_max_inline_data(pd, max_data_size)) + return -EINVAL; + + /* + * For MLX transport we need 2 extra send gather entries: + * one for the header and one for the checksum at the end + */ + if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg) + return -EINVAL; + + if (mthca_is_memfree(dev)) { + qp->rq.max = cap->max_recv_wr ? + roundup_pow_of_two(cap->max_recv_wr) : 0; + qp->sq.max = cap->max_send_wr ? + roundup_pow_of_two(cap->max_send_wr) : 0; + } else { + qp->rq.max = cap->max_recv_wr; + qp->sq.max = cap->max_send_wr; + } + + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = max_t(int, cap->max_send_sge, + ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, + MTHCA_INLINE_CHUNK_SIZE) / + sizeof (struct mthca_data_seg)); + + return 0; +} + +int mthca_alloc_qp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_qp_type type, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + struct mthca_qp *qp) +{ + int err; + + switch (type) { + case IB_QPT_RC: qp->transport = RC; break; + case IB_QPT_UC: qp->transport = UC; break; + case IB_QPT_UD: qp->transport = UD; break; + default: return -EINVAL; + } + + err = mthca_set_qp_size(dev, cap, pd, qp); + if (err) + return err; + + qp->qpn = mthca_alloc(&dev->qp_table.alloc); + if (qp->qpn == -1) + return -ENOMEM; + + /* initialize port to zero for error-catching. */ + qp->port = 0; + + err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, + send_policy, qp); + if (err) { + mthca_free(&dev->qp_table.alloc, qp->qpn); + return err; + } + + spin_lock_irq(&dev->qp_table.lock); + mthca_array_set(&dev->qp_table.qp, + qp->qpn & (dev->limits.num_qps - 1), qp); + spin_unlock_irq(&dev->qp_table.lock); + + return 0; +} + +static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) + __releases(&send_cq->lock) __releases(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + __release(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + +int mthca_alloc_sqp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + int qpn, + int port, + struct mthca_sqp *sqp) +{ + u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; + int err; + + sqp->qp.transport = MLX; + err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); + if (err) + return err; + + sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; + sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, + &sqp->header_dma, GFP_KERNEL); + if (!sqp->header_buf) + return -ENOMEM; + + spin_lock_irq(&dev->qp_table.lock); + if (mthca_array_get(&dev->qp_table.qp, mqpn)) + err = -EBUSY; + else + mthca_array_set(&dev->qp_table.qp, mqpn, sqp); + spin_unlock_irq(&dev->qp_table.lock); + + if (err) + goto err_out; + + sqp->qp.port = port; + sqp->qp.qpn = mqpn; + sqp->qp.transport = MLX; + + err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, + send_policy, &sqp->qp); + if (err) + goto err_out_free; + + atomic_inc(&pd->sqp_count); + + return 0; + + err_out_free: + /* + * Lock CQs here, so that CQ polling code can do QP lookup + * without taking a lock. + */ + mthca_lock_cqs(send_cq, recv_cq); + + spin_lock(&dev->qp_table.lock); + mthca_array_clear(&dev->qp_table.qp, mqpn); + spin_unlock(&dev->qp_table.lock); + + mthca_unlock_cqs(send_cq, recv_cq); + + err_out: + dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, + sqp->header_buf, sqp->header_dma); + + return err; +} + +static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp) +{ + int c; + + spin_lock_irq(&dev->qp_table.lock); + c = qp->refcount; + spin_unlock_irq(&dev->qp_table.lock); + + return c; +} + +void mthca_free_qp(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + struct mthca_cq *send_cq; + struct mthca_cq *recv_cq; + + send_cq = to_mcq(qp->ibqp.send_cq); + recv_cq = to_mcq(qp->ibqp.recv_cq); + + /* + * Lock CQs here, so that CQ polling code can do QP lookup + * without taking a lock. + */ + mthca_lock_cqs(send_cq, recv_cq); + + spin_lock(&dev->qp_table.lock); + mthca_array_clear(&dev->qp_table.qp, + qp->qpn & (dev->limits.num_qps - 1)); + --qp->refcount; + spin_unlock(&dev->qp_table.lock); + + mthca_unlock_cqs(send_cq, recv_cq); + + wait_event(qp->wait, !get_qp_refcount(dev, qp)); + + if (qp->state != IB_QPS_RESET) + mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0, + NULL, 0); + + /* + * If this is a userspace QP, the buffers, MR, CQs and so on + * will be cleaned up in userspace, so all we have to do is + * unref the mem-free tables and free the QPN in our table. + */ + if (!qp->ibqp.uobject) { + mthca_cq_clean(dev, recv_cq, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + mthca_cq_clean(dev, send_cq, qp->qpn, NULL); + + mthca_free_memfree(dev, qp); + mthca_free_wqe_buf(dev, qp); + } + + mthca_unmap_memfree(dev, qp); + + if (is_sqp(dev, qp)) { + atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); + dma_free_coherent(&dev->pdev->dev, + to_msqp(qp)->header_buf_size, + to_msqp(qp)->header_buf, + to_msqp(qp)->header_dma); + } else + mthca_free(&dev->qp_table.alloc, qp->qpn); +} + +/* Create UD header for an MLX send and build a data segment for it */ +static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, + int ind, struct ib_send_wr *wr, + struct mthca_mlx_seg *mlx, + struct mthca_data_seg *data) +{ + int header_size; + int err; + u16 pkey; + + ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, + mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, + &sqp->ud_header); + + err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); + if (err) + return err; + mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + mlx->vcrc = 0; + + switch (wr->opcode) { + case IB_WR_SEND: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; + sqp->ud_header.immediate_present = 0; + break; + case IB_WR_SEND_WITH_IMM: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + sqp->ud_header.immediate_present = 1; + sqp->ud_header.immediate_data = wr->ex.imm_data; + break; + default: + return -EINVAL; + } + + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + if (!sqp->qp.ibqp.qp_num) + ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, + sqp->pkey_index, &pkey); + else + ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, + wr->wr.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? + sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + + header_size = ib_ud_header_pack(&sqp->ud_header, + sqp->header_buf + + ind * MTHCA_UD_HEADER_SIZE); + + data->byte_count = cpu_to_be32(header_size); + data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); + data->addr = cpu_to_be64(sqp->header_dma + + ind * MTHCA_UD_HEADER_SIZE); + + return 0; +} + +static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, + struct ib_cq *ib_cq) +{ + unsigned cur; + struct mthca_cq *cq; + + cur = wq->head - wq->tail; + if (likely(cur + nreq < wq->max)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return cur + nreq >= wq->max; +} + +static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, + struct ib_send_wr *wr) +{ + useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); + useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + +} + +static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, + struct ib_send_wr *wr) +{ + memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + +int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + void *wqe; + void *prev_wqe; + unsigned long flags; + int err = 0; + int nreq; + int i; + int size; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); + int ind; + u8 op0 = 0; + + spin_lock_irqsave(&qp->sq.lock, flags); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.next_ind; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + mthca_err(dev, "SQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->sq.head, qp->sq.tail, + qp->sq.max, nreq); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind); + prev_wqe = qp->sq.last; + qp->sq.last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + ((struct mthca_next_seg *) wqe)->flags = + ((wr->send_flags & IB_SEND_SIGNALED) ? + cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_flags & IB_SEND_SOLICITED) ? + cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | + cpu_to_be32(1); + if (wr->opcode == IB_WR_SEND_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + switch (qp->transport) { + case RC: + switch (wr->opcode) { + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mthca_raddr_seg); + + set_atomic_seg(wqe, wr); + wqe += sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + case IB_WR_RDMA_READ: + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UD: + set_tavor_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_tavor_ud_seg); + size += sizeof (struct mthca_tavor_ud_seg) / 16; + break; + + case MLX: + err = build_mlx_header(dev, to_msqp(qp), ind, wr, + wqe - sizeof (struct mthca_next_seg), + wqe); + if (err) { + *bad_wr = wr; + goto out; + } + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + break; + } + + if (wr->num_sge > qp->sq.max_gs) { + mthca_err(dev, "too many gathers\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + for (i = 0; i < wr->num_sge; ++i) { + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + /* Add one more inline data segment for ICRC */ + if (qp->transport == MLX) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32((1 << 31) | 4); + ((u32 *) wqe)[1] = 0; + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind + qp->rq.max] = wr->wr_id; + + if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) { + mthca_err(dev, "opcode invalid\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | + mthca_opcode[wr->opcode]); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size | + ((wr->send_flags & IB_SEND_FENCE) ? + MTHCA_NEXT_FENCE : 0)); + + if (!nreq) { + size0 = size; + op0 = mthca_opcode[wr->opcode]; + f0 = wr->send_flags & IB_SEND_FENCE ? + MTHCA_SEND_DOORBELL_FENCE : 0; + } + + ++ind; + if (unlikely(ind >= qp->sq.max)) + ind -= qp->sq.max; + } + +out: + if (likely(nreq)) { + wmb(); + + mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | f0 | op0, + (qp->qpn << 8) | size0, + dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + /* + * Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order: + */ + mmiowb(); + } + + qp->sq.next_ind = ind; + qp->sq.head += nreq; + + spin_unlock_irqrestore(&qp->sq.lock, flags); + return err; +} + +int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + unsigned long flags; + int err = 0; + int nreq; + int i; + int size; + /* + * size0 is only used if nreq != 0, and it will always be + * initialized the first time through the main loop before + * nreq is incremented. So nreq cannot become non-zero + * without initializing size0, and it is in fact never used + * uninitialized. + */ + int uninitialized_var(size0); + int ind; + void *wqe; + void *prev_wqe; + + spin_lock_irqsave(&qp->rq.lock, flags); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.next_ind; + + for (nreq = 0; wr; wr = wr->next) { + if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + mthca_err(dev, "RQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->rq.head, qp->rq.tail, + qp->rq.max, nreq); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(qp, ind); + prev_wqe = qp->rq.last; + qp->rq.last = wqe; + + ((struct mthca_next_seg *) wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD); + ((struct mthca_next_seg *) wqe)->flags = 0; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + for (i = 0; i < wr->num_sge; ++i) { + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind] = wr->wr_id; + + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD | size); + + if (!nreq) + size0 = size; + + ++ind; + if (unlikely(ind >= qp->rq.max)) + ind -= qp->rq.max; + + ++nreq; + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + wmb(); + + mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0, + qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + qp->rq.next_ind = ind; + qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; + } + } + +out: + if (likely(nreq)) { + wmb(); + + mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0, + qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + qp->rq.next_ind = ind; + qp->rq.head += nreq; + + /* + * Make sure doorbells don't leak out of RQ spinlock and reach + * the HCA out of order: + */ + mmiowb(); + + spin_unlock_irqrestore(&qp->rq.lock, flags); + return err; +} + +int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + u32 dbhi; + void *wqe; + void *prev_wqe; + unsigned long flags; + int err = 0; + int nreq; + int i; + int size; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); + int ind; + u8 op0 = 0; + + spin_lock_irqsave(&qp->sq.lock, flags); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.head & (qp->sq.max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { + nreq = 0; + + dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | + ((qp->sq.head & 0xffff) << 8) | f0 | op0; + + qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + + mthca_write64(dbhi, (qp->qpn << 8) | size0, + dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + mthca_err(dev, "SQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->sq.head, qp->sq.tail, + qp->sq.max, nreq); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind); + prev_wqe = qp->sq.last; + qp->sq.last = wqe; + + ((struct mthca_next_seg *) wqe)->flags = + ((wr->send_flags & IB_SEND_SIGNALED) ? + cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_flags & IB_SEND_SOLICITED) ? + cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | + ((wr->send_flags & IB_SEND_IP_CSUM) ? + cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) | + cpu_to_be32(1); + if (wr->opcode == IB_WR_SEND_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + switch (qp->transport) { + case RC: + switch (wr->opcode) { + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mthca_raddr_seg); + + set_atomic_seg(wqe, wr); + wqe += sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; + break; + + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UD: + set_arbel_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_arbel_ud_seg); + size += sizeof (struct mthca_arbel_ud_seg) / 16; + break; + + case MLX: + err = build_mlx_header(dev, to_msqp(qp), ind, wr, + wqe - sizeof (struct mthca_next_seg), + wqe); + if (err) { + *bad_wr = wr; + goto out; + } + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + break; + } + + if (wr->num_sge > qp->sq.max_gs) { + mthca_err(dev, "too many gathers\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + for (i = 0; i < wr->num_sge; ++i) { + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + /* Add one more inline data segment for ICRC */ + if (qp->transport == MLX) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32((1 << 31) | 4); + ((u32 *) wqe)[1] = 0; + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind + qp->rq.max] = wr->wr_id; + + if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) { + mthca_err(dev, "opcode invalid\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | + mthca_opcode[wr->opcode]); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD | size | + ((wr->send_flags & IB_SEND_FENCE) ? + MTHCA_NEXT_FENCE : 0)); + + if (!nreq) { + size0 = size; + op0 = mthca_opcode[wr->opcode]; + f0 = wr->send_flags & IB_SEND_FENCE ? + MTHCA_SEND_DOORBELL_FENCE : 0; + } + + ++ind; + if (unlikely(ind >= qp->sq.max)) + ind -= qp->sq.max; + } + +out: + if (likely(nreq)) { + dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0; + + qp->sq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + + mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + /* + * Make sure doorbells don't leak out of SQ spinlock and reach + * the HCA out of order: + */ + mmiowb(); + + spin_unlock_irqrestore(&qp->sq.lock, flags); + return err; +} + +int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + void *wqe; + + spin_lock_irqsave(&qp->rq.lock, flags); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.head & (qp->rq.max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + mthca_err(dev, "RQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->rq.head, qp->rq.tail, + qp->rq.max, nreq); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(qp, ind); + + ((struct mthca_next_seg *) wqe)->flags = 0; + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + for (i = 0; i < wr->num_sge; ++i) { + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < qp->rq.max_gs) + mthca_set_data_seg_inval(wqe); + + qp->wrid[ind] = wr->wr_id; + + ++ind; + if (unlikely(ind >= qp->rq.max)) + ind -= qp->rq.max; + } +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + return err; +} + +void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, + int index, int *dbd, __be32 *new_wqe) +{ + struct mthca_next_seg *next; + + /* + * For SRQs, all receive WQEs generate a CQE, so we're always + * at the end of the doorbell chain. + */ + if (qp->ibqp.srq && !is_send) { + *new_wqe = 0; + return; + } + + if (is_send) + next = get_send_wqe(qp, index); + else + next = get_recv_wqe(qp, index); + + *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); + if (next->ee_nds & cpu_to_be32(0x3f)) + *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) | + (next->ee_nds & cpu_to_be32(0x3f)); + else + *new_wqe = 0; +} + +int mthca_init_qp_table(struct mthca_dev *dev) +{ + int err; + int i; + + spin_lock_init(&dev->qp_table.lock); + + /* + * We reserve 2 extra QPs per port for the special QPs. The + * special QP for port 1 has to be even, so round up. + */ + dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL; + err = mthca_alloc_init(&dev->qp_table.alloc, + dev->limits.num_qps, + (1 << 24) - 1, + dev->qp_table.sqp_start + + MTHCA_MAX_PORTS * 2); + if (err) + return err; + + err = mthca_array_init(&dev->qp_table.qp, + dev->limits.num_qps); + if (err) { + mthca_alloc_cleanup(&dev->qp_table.alloc); + return err; + } + + for (i = 0; i < 2; ++i) { + err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI, + dev->qp_table.sqp_start + i * 2); + if (err) { + mthca_warn(dev, "CONF_SPECIAL_QP returned " + "%d, aborting.\n", err); + goto err_out; + } + } + return 0; + + err_out: + for (i = 0; i < 2; ++i) + mthca_CONF_SPECIAL_QP(dev, i, 0); + + mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); + mthca_alloc_cleanup(&dev->qp_table.alloc); + + return err; +} + +void mthca_cleanup_qp_table(struct mthca_dev *dev) +{ + int i; + + for (i = 0; i < 2; ++i) + mthca_CONF_SPECIAL_QP(dev, i, 0); + + mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); + mthca_alloc_cleanup(&dev->qp_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c new file mode 100644 index 00000000..4fa3534e --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +int mthca_reset(struct mthca_dev *mdev) +{ + int i; + int err = 0; + u32 *hca_header = NULL; + u32 *bridge_header = NULL; + struct pci_dev *bridge = NULL; + int bridge_pcix_cap = 0; + int hca_pcie_cap = 0; + int hca_pcix_cap = 0; + + u16 devctl; + u16 linkctl; + +#define MTHCA_RESET_OFFSET 0xf0010 +#define MTHCA_RESET_VALUE swab32(1) + + /* + * Reset the chip. This is somewhat ugly because we have to + * save off the PCI header before reset and then restore it + * after the chip reboots. We skip config space offsets 22 + * and 23 since those have a special meaning. + * + * To make matters worse, for Tavor (PCI-X HCA) we have to + * find the associated bridge device and save off its PCI + * header as well. + */ + + if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) { + /* Look for the bridge -- its device ID will be 2 more + than HCA's device ID. */ + while ((bridge = pci_get_device(mdev->pdev->vendor, + mdev->pdev->device + 2, + bridge)) != NULL) { + if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE && + bridge->subordinate == mdev->pdev->bus) { + mthca_dbg(mdev, "Found bridge: %s\n", + pci_name(bridge)); + break; + } + } + + if (!bridge) { + /* + * Didn't find a bridge for a Tavor device -- + * assume we're in no-bridge mode and hope for + * the best. + */ + mthca_warn(mdev, "No bridge found for %s\n", + pci_name(mdev->pdev)); + } + + } + + /* For Arbel do we need to save off the full 4K PCI Express header?? */ + hca_header = kmalloc(256, GFP_KERNEL); + if (!hca_header) { + err = -ENOMEM; + mthca_err(mdev, "Couldn't allocate memory to save HCA " + "PCI header, aborting.\n"); + goto out; + } + + for (i = 0; i < 64; ++i) { + if (i == 22 || i == 23) + continue; + if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't save HCA " + "PCI header, aborting.\n"); + goto out; + } + } + + hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); + hca_pcie_cap = pci_pcie_cap(mdev->pdev); + + if (bridge) { + bridge_header = kmalloc(256, GFP_KERNEL); + if (!bridge_header) { + err = -ENOMEM; + mthca_err(mdev, "Couldn't allocate memory to save HCA " + "bridge PCI header, aborting.\n"); + goto out; + } + + for (i = 0; i < 64; ++i) { + if (i == 22 || i == 23) + continue; + if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't save HCA bridge " + "PCI header, aborting.\n"); + goto out; + } + } + bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX); + if (!bridge_pcix_cap) { + err = -ENODEV; + mthca_err(mdev, "Couldn't locate HCA bridge " + "PCI-X capability, aborting.\n"); + goto out; + } + } + + /* actually hit reset */ + { + void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) + + MTHCA_RESET_OFFSET, 4); + + if (!reset) { + err = -ENOMEM; + mthca_err(mdev, "Couldn't map HCA reset register, " + "aborting.\n"); + goto out; + } + + writel(MTHCA_RESET_VALUE, reset); + iounmap(reset); + } + + /* Docs say to wait one second before accessing device */ + msleep(1000); + + /* Now wait for PCI device to start responding again */ + { + u32 v; + int c = 0; + + for (c = 0; c < 100; ++c) { + if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't access HCA after reset, " + "aborting.\n"); + goto out; + } + + if (v != 0xffffffff) + goto good; + + msleep(100); + } + + err = -ENODEV; + mthca_err(mdev, "PCI device did not come back after reset, " + "aborting.\n"); + goto out; + } + +good: + /* Now restore the PCI headers */ + if (bridge) { + if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8, + bridge_header[(bridge_pcix_cap + 0x8) / 4])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA bridge Upstream " + "split transaction control, aborting.\n"); + goto out; + } + if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc, + bridge_header[(bridge_pcix_cap + 0xc) / 4])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA bridge Downstream " + "split transaction control, aborting.\n"); + goto out; + } + /* + * Bridge control register is at 0x3e, so we'll + * naturally restore it last in this loop. + */ + for (i = 0; i < 16; ++i) { + if (i * 4 == PCI_COMMAND) + continue; + + if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " + "aborting.\n", i); + goto out; + } + } + + if (pci_write_config_dword(bridge, PCI_COMMAND, + bridge_header[PCI_COMMAND / 4])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " + "aborting.\n"); + goto out; + } + } + + if (hca_pcix_cap) { + if (pci_write_config_dword(mdev->pdev, hca_pcix_cap, + hca_header[hca_pcix_cap / 4])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA PCI-X " + "command register, aborting.\n"); + goto out; + } + } + + if (hca_pcie_cap) { + devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4]; + if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL, + devctl)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA PCI Express " + "Device Control register, aborting.\n"); + goto out; + } + linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; + if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL, + linkctl)) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA PCI Express " + "Link control register, aborting.\n"); + goto out; + } + } + + for (i = 0; i < 16; ++i) { + if (i * 4 == PCI_COMMAND) + continue; + + if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA reg %x, " + "aborting.\n", i); + goto out; + } + } + + if (pci_write_config_dword(mdev->pdev, PCI_COMMAND, + hca_header[PCI_COMMAND / 4])) { + err = -ENODEV; + mthca_err(mdev, "Couldn't restore HCA COMMAND, " + "aborting.\n"); + goto out; + } + +out: + if (bridge) + pci_dev_put(bridge); + kfree(bridge_header); + kfree(hca_header); + + return err; +} diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c new file mode 100644 index 00000000..d22f9704 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -0,0 +1,696 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + +enum { + MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE +}; + +struct mthca_tavor_srq_context { + __be64 wqe_base_ds; /* low 6 bits is descriptor size */ + __be32 state_pd; + __be32 lkey; + __be32 uar; + __be16 limit_watermark; + __be16 wqe_cnt; + u32 reserved[2]; +}; + +struct mthca_arbel_srq_context { + __be32 state_logsize_srqn; + __be32 lkey; + __be32 db_index; + __be32 logstride_usrpage; + __be64 wqe_base; + __be32 eq_pd; + __be16 limit_watermark; + __be16 wqe_cnt; + u16 reserved1; + __be16 wqe_counter; + u32 reserved2[3]; +}; + +static void *get_wqe(struct mthca_srq *srq, int n) +{ + if (srq->is_direct) + return srq->queue.direct.buf + (n << srq->wqe_shift); + else + return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf + + ((n << srq->wqe_shift) & (PAGE_SIZE - 1)); +} + +/* + * Return a pointer to the location within a WQE that we're using as a + * link when the WQE is in the free list. We use the imm field + * because in the Tavor case, posting a WQE may overwrite the next + * segment of the previous WQE, but a receive WQE will never touch the + * imm field. This avoids corrupting our free list if the previous + * WQE has already completed and been put on the free list when we + * post the next WQE. + */ +static inline int *wqe_to_link(void *wqe) +{ + return (int *) (wqe + offsetof(struct mthca_next_seg, imm)); +} + +static void mthca_tavor_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_tavor_srq_context *context) +{ + memset(context, 0, sizeof *context); + + context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4)); + context->state_pd = cpu_to_be32(pd->pd_num); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + + if (pd->ibpd.uobject) + context->uar = + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->uar = cpu_to_be32(dev->driver_uar.index); +} + +static void mthca_arbel_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_arbel_srq_context *context) +{ + int logsize, max; + + memset(context, 0, sizeof *context); + + /* + * Put max in a temporary variable to work around gcc bug + * triggered by ilog2() on sparc64. + */ + max = srq->max; + logsize = ilog2(max); + context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + context->db_index = cpu_to_be32(srq->db_index); + context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); + if (pd->ibpd.uobject) + context->logstride_usrpage |= + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index); + context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); +} + +static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) +{ + mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue, + srq->is_direct, &srq->mr); + kfree(srq->wrid); +} + +static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, + struct mthca_srq *srq) +{ + struct mthca_data_seg *scatter; + void *wqe; + int err; + int i; + + if (pd->ibpd.uobject) + return 0; + + srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); + if (!srq->wrid) + return -ENOMEM; + + err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift, + MTHCA_MAX_DIRECT_SRQ_SIZE, + &srq->queue, &srq->is_direct, pd, 1, &srq->mr); + if (err) { + kfree(srq->wrid); + return err; + } + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. In addition, set the + * scatter list L_Keys to the sentry value of 0x100. + */ + for (i = 0; i < srq->max; ++i) { + struct mthca_next_seg *next; + + next = wqe = get_wqe(srq, i); + + if (i < srq->max - 1) { + *wqe_to_link(wqe) = i + 1; + next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1); + } else { + *wqe_to_link(wqe) = -1; + next->nda_op = 0; + } + + for (scatter = wqe + sizeof (struct mthca_next_seg); + (void *) scatter < wqe + (1 << srq->wqe_shift); + ++scatter) + scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + } + + srq->last = get_wqe(srq, srq->max - 1); + + return 0; +} + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + int ds; + int err; + + /* Sanity check SRQ size before proceeding */ + if (attr->max_wr > dev->limits.max_srq_wqes || + attr->max_sge > dev->limits.max_srq_sge) + return -EINVAL; + + srq->max = attr->max_wr; + srq->max_gs = attr->max_sge; + srq->counter = 0; + + if (mthca_is_memfree(dev)) + srq->max = roundup_pow_of_two(srq->max + 1); + else + srq->max = srq->max + 1; + + ds = max(64UL, + roundup_pow_of_two(sizeof (struct mthca_next_seg) + + srq->max_gs * sizeof (struct mthca_data_seg))); + + if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz)) + return -EINVAL; + + srq->wqe_shift = ilog2(ds); + + srq->srqn = mthca_alloc(&dev->srq_table.alloc); + if (srq->srqn == -1) + return -ENOMEM; + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->srq_table.table, srq->srqn); + if (err) + goto err_out; + + if (!pd->ibpd.uobject) { + srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, + srq->srqn, &srq->db); + if (srq->db_index < 0) { + err = -ENOMEM; + goto err_out_icm; + } + } + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_db; + } + + err = mthca_alloc_srq_buf(dev, pd, srq); + if (err) + goto err_out_mailbox; + + spin_lock_init(&srq->lock); + srq->refcount = 1; + init_waitqueue_head(&srq->wait); + mutex_init(&srq->mutex); + + if (mthca_is_memfree(dev)) + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + else + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + + err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); + + if (err) { + mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); + goto err_out_free_buf; + } + + spin_lock_irq(&dev->srq_table.lock); + if (mthca_array_set(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1), + srq)) { + spin_unlock_irq(&dev->srq_table.lock); + goto err_out_free_srq; + } + spin_unlock_irq(&dev->srq_table.lock); + + mthca_free_mailbox(dev, mailbox); + + srq->first_free = 0; + srq->last_free = srq->max - 1; + + attr->max_wr = srq->max - 1; + attr->max_sge = srq->max_gs; + + return 0; + +err_out_free_srq: + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + +err_out_free_buf: + if (!pd->ibpd.uobject) + mthca_free_srq_buf(dev, srq); + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_db: + if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + +err_out_icm: + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + +err_out: + mthca_free(&dev->srq_table.alloc, srq->srqn); + + return err; +} + +static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq) +{ + int c; + + spin_lock_irq(&dev->srq_table.lock); + c = srq->refcount; + spin_unlock_irq(&dev->srq_table.lock); + + return c; +} + +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + mthca_warn(dev, "No memory for mailbox to free SRQ.\n"); + return; + } + + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + + spin_lock_irq(&dev->srq_table.lock); + mthca_array_clear(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1)); + --srq->refcount; + spin_unlock_irq(&dev->srq_table.lock); + + wait_event(srq->wait, !get_srq_refcount(dev, srq)); + + if (!srq->ibsrq.uobject) { + mthca_free_srq_buf(dev, srq); + if (mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + } + + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + mthca_free(&dev->srq_table.alloc, srq->srqn); + mthca_free_mailbox(dev, mailbox); +} + +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + int ret = 0; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max; + if (attr->srq_limit > max_wr) + return -EINVAL; + + mutex_lock(&srq->mutex); + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit); + mutex_unlock(&srq->mutex); + } + + return ret; +} + +int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + struct mthca_mailbox *mailbox; + struct mthca_arbel_srq_context *arbel_ctx; + struct mthca_tavor_srq_context *tavor_ctx; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox); + if (err) + goto out; + + if (mthca_is_memfree(dev)) { + arbel_ctx = mailbox->buf; + srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark); + } else { + tavor_ctx = mailbox->buf; + srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark); + } + + srq_attr->max_wr = srq->max - 1; + srq_attr->max_sge = srq->max_gs; + +out: + mthca_free_mailbox(dev, mailbox); + + return err; +} + +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type) +{ + struct mthca_srq *srq; + struct ib_event event; + + spin_lock(&dev->srq_table.lock); + srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); + if (srq) + ++srq->refcount; + spin_unlock(&dev->srq_table.lock); + + if (!srq) { + mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + if (!srq->ibsrq.event_handler) + goto out; + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.srq = &srq->ibsrq; + srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); + +out: + spin_lock(&dev->srq_table.lock); + if (!--srq->refcount) + wake_up(&srq->wait); + spin_unlock(&dev->srq_table.lock); +} + +/* + * This function must be called with IRQs disabled. + */ +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) +{ + int ind; + struct mthca_next_seg *last_free; + + ind = wqe_addr >> srq->wqe_shift; + + spin_lock(&srq->lock); + + last_free = get_wqe(srq, srq->last_free); + *wqe_to_link(last_free) = ind; + last_free->nda_op = htonl((ind << srq->wqe_shift) | 1); + *wqe_to_link(get_wqe(srq, ind)) = -1; + srq->last_free = ind; + + spin_unlock(&srq->lock); +} + +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int first_ind; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + void *prev_wqe; + + spin_lock_irqsave(&srq->lock, flags); + + first_ind = srq->first_free; + + for (nreq = 0; wr; wr = wr->next) { + ind = srq->first_free; + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + if (unlikely(next_ind < 0)) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + prev_wqe = srq->last; + srq->last = wqe; + + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + srq->last = prev_wqe; + break; + } + + for (i = 0; i < wr->num_sge; ++i) { + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); + + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD); + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + + ++nreq; + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + wmb(); + + mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + first_ind = srq->first_free; + } + } + + if (likely(nreq)) { + /* + * Make sure that descriptors are written before + * doorbell is rung. + */ + wmb(); + + mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + /* + * Make sure doorbells don't leak out of SRQ spinlock and + * reach the HCA out of order: + */ + mmiowb(); + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ind = srq->first_free; + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + if (unlikely(next_ind < 0)) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + for (i = 0; i < wr->num_sge; ++i) { + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + if (likely(nreq)) { + srq->counter += nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + *srq->db = cpu_to_be32(srq->counter); + } + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int mthca_max_srq_sge(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) + return dev->limits.max_sg; + + /* + * SRQ allocations are based on powers of 2 for Tavor, + * (although they only need to be multiples of 16 bytes). + * + * Therefore, we need to base the max number of sg entries on + * the largest power of 2 descriptor size that is <= to the + * actual max WQE descriptor size, rather than return the + * max_sg value given by the firmware (which is based on WQE + * sizes as multiples of 16, not powers of 2). + * + * If SRQ implementation is changed for Tavor to be based on + * multiples of 16, the calculation below can be deleted and + * the FW max_sg value returned. + */ + return min_t(int, dev->limits.max_sg, + ((1 << (fls(dev->limits.max_desc_sz) - 1)) - + sizeof (struct mthca_next_seg)) / + sizeof (struct mthca_data_seg)); +} + +int mthca_init_srq_table(struct mthca_dev *dev) +{ + int err; + + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return 0; + + spin_lock_init(&dev->srq_table.lock); + + err = mthca_alloc_init(&dev->srq_table.alloc, + dev->limits.num_srqs, + dev->limits.num_srqs - 1, + dev->limits.reserved_srqs); + if (err) + return err; + + err = mthca_array_init(&dev->srq_table.srq, + dev->limits.num_srqs); + if (err) + mthca_alloc_cleanup(&dev->srq_table.alloc); + + return err; +} + +void mthca_cleanup_srq_table(struct mthca_dev *dev) +{ + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return; + + mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs); + mthca_alloc_cleanup(&dev->srq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c new file mode 100644 index 00000000..ca5900c9 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_uar.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include /* PAGE_SHIFT */ + +#include "mthca_dev.h" +#include "mthca_memfree.h" + +int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar) +{ + uar->index = mthca_alloc(&dev->uar_table.alloc); + if (uar->index == -1) + return -ENOMEM; + + uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index; + + return 0; +} + +void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar) +{ + mthca_free(&dev->uar_table.alloc, uar->index); +} + +int mthca_init_uar_table(struct mthca_dev *dev) +{ + int ret; + + ret = mthca_alloc_init(&dev->uar_table.alloc, + dev->limits.num_uars, + dev->limits.num_uars - 1, + dev->limits.reserved_uars + 1); + if (ret) + return ret; + + ret = mthca_init_db_tab(dev); + if (ret) + mthca_alloc_cleanup(&dev->uar_table.alloc); + + return ret; +} + +void mthca_cleanup_uar_table(struct mthca_dev *dev) +{ + mthca_cleanup_db_tab(dev); + + /* XXX check if any UARs are still allocated? */ + mthca_alloc_cleanup(&dev->uar_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h new file mode 100644 index 00000000..5fe56e81 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_USER_H +#define MTHCA_USER_H + +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MTHCA_UVERBS_ABI_VERSION 1 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mthca_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct mthca_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct mthca_reg_mr { +/* + * Mark the memory region with a DMA attribute that causes + * in-flight DMA to be flushed when the region is written to: + */ +#define MTHCA_MR_DMASYNC 0x1 + __u32 mr_attrs; + __u32 reserved; +}; + +struct mthca_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct mthca_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mthca_resize_cq { + __u32 lkey; + __u32 reserved; +}; + +struct mthca_create_srq { + __u32 lkey; + __u32 db_index; + __u64 db_page; +}; + +struct mthca_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mthca_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* MTHCA_USER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h new file mode 100644 index 00000000..341a5ae8 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MTHCA_WQE_H +#define MTHCA_WQE_H + +#include + +enum { + MTHCA_NEXT_DBD = 1 << 7, + MTHCA_NEXT_FENCE = 1 << 6, + MTHCA_NEXT_CQ_UPDATE = 1 << 3, + MTHCA_NEXT_EVENT_GEN = 1 << 2, + MTHCA_NEXT_SOLICIT = 1 << 1, + MTHCA_NEXT_IP_CSUM = 1 << 4, + MTHCA_NEXT_TCP_UDP_CSUM = 1 << 5, + + MTHCA_MLX_VL15 = 1 << 17, + MTHCA_MLX_SLR = 1 << 16 +}; + +enum { + MTHCA_INVAL_LKEY = 0x100, + MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256, + MTHCA_ARBEL_MAX_WQES_PER_SEND_DB = 255 +}; + +struct mthca_next_seg { + __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */ + __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ + __be32 flags; /* [3] CQ [2] Event [1] Solicit */ + __be32 imm; /* immediate data */ +}; + +struct mthca_tavor_ud_seg { + u32 reserved1; + __be32 lkey; + __be64 av_addr; + u32 reserved2[4]; + __be32 dqpn; + __be32 qkey; + u32 reserved3[2]; +}; + +struct mthca_arbel_ud_seg { + __be32 av[8]; + __be32 dqpn; + __be32 qkey; + u32 reserved[2]; +}; + +struct mthca_bind_seg { + __be32 flags; /* [31] Atomic [30] rem write [29] rem read */ + u32 reserved; + __be32 new_rkey; + __be32 lkey; + __be64 addr; + __be64 length; +}; + +struct mthca_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mthca_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mthca_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mthca_mlx_seg { + __be32 nda_op; + __be32 nds; + __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate + [11:8] SL [3] C [2] E */ + __be16 rlid; + __be16 vcrc; +}; + +static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg) +{ + dseg->byte_count = 0; + dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + dseg->addr = 0; +} + +#endif /* MTHCA_WQE_H */ diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig new file mode 100644 index 00000000..846dc97c --- /dev/null +++ b/drivers/infiniband/hw/nes/Kconfig @@ -0,0 +1,16 @@ +config INFINIBAND_NES + tristate "NetEffect RNIC Driver" + depends on PCI && INET && INFINIBAND + select LIBCRC32C + select INET_LRO + ---help--- + This is the RDMA Network Interface Card (RNIC) driver for + NetEffect Ethernet Cluster Server Adapters. + +config INFINIBAND_NES_DEBUG + bool "Verbose debugging output" + depends on INFINIBAND_NES + default n + ---help--- + This option enables debug messages from the NetEffect RNIC + driver. Select this if you are diagnosing a problem. diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile new file mode 100644 index 00000000..97820c23 --- /dev/null +++ b/drivers/infiniband/hw/nes/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o + +iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c new file mode 100644 index 00000000..7140199f --- /dev/null +++ b/drivers/infiniband/hw/nes/nes.c @@ -0,0 +1,1246 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nes.h" + +#include +#include +#include +#include + +MODULE_AUTHOR("NetEffect"); +MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +int max_mtu = 9000; +int interrupt_mod_interval = 0; + + +/* Interoperability */ +int mpa_version = 1; +module_param(mpa_version, int, 0644); +MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)"); + +/* Interoperability */ +int disable_mpa_crc = 0; +module_param(disable_mpa_crc, int, 0644); +MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); + +unsigned int send_first = 0; +module_param(send_first, int, 0644); +MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection"); + + +unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU; +module_param(nes_drv_opt, int, 0644); +MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); + +unsigned int nes_debug_level = 0; +module_param_named(debug_level, nes_debug_level, uint, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug output level"); + +unsigned int wqm_quanta = 0x10000; +module_param(wqm_quanta, int, 0644); +MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); + +static bool limit_maxrdreqsz; +module_param(limit_maxrdreqsz, bool, 0644); +MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes"); + +LIST_HEAD(nes_adapter_list); +static LIST_HEAD(nes_dev_list); + +atomic_t qps_destroyed; + +static unsigned int ee_flsh_adapter; +static unsigned int sysfs_nonidx_addr; +static unsigned int sysfs_idx_addr; + +static struct pci_device_id nes_pci_table[] = { + { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), }, + { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), }, + {0} +}; + +MODULE_DEVICE_TABLE(pci, nes_pci_table); + +static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); +static int nes_net_event(struct notifier_block *, unsigned long, void *); +static int nes_notifiers_registered; + + +static struct notifier_block nes_inetaddr_notifier = { + .notifier_call = nes_inetaddr_event +}; + +static struct notifier_block nes_net_notifier = { + .notifier_call = nes_net_event +}; + +/** + * nes_inetaddr_event + */ +static int nes_inetaddr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *event_netdev = ifa->ifa_dev->dev; + struct nes_device *nesdev; + struct net_device *netdev; + struct nes_vnic *nesvnic; + unsigned int is_bonded; + + nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n", + &ifa->ifa_address, &ifa->ifa_mask); + list_for_each_entry(nesdev, &nes_dev_list, list) { + nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n", + nesdev, nesdev->netdev[0]->name); + netdev = nesdev->netdev[0]; + nesvnic = netdev_priv(netdev); + is_bonded = netif_is_bond_slave(netdev) && + (netdev->master == event_netdev); + if ((netdev == event_netdev) || is_bonded) { + if (nesvnic->rdma_enabled == 0) { + nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since" + " RDMA is not enabled.\n", + netdev->name); + return NOTIFY_OK; + } + /* we have ifa->ifa_address/mask here if we need it */ + switch (event) { + case NETDEV_DOWN: + nes_debug(NES_DBG_NETDEV, "event:DOWN\n"); + nes_write_indexed(nesdev, + NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0); + + nes_manage_arp_cache(netdev, netdev->dev_addr, + ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE); + nesvnic->local_ipaddr = 0; + if (is_bonded) + continue; + else + return NOTIFY_OK; + break; + case NETDEV_UP: + nes_debug(NES_DBG_NETDEV, "event:UP\n"); + + if (nesvnic->local_ipaddr != 0) { + nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n"); + return NOTIFY_OK; + } + /* fall through */ + case NETDEV_CHANGEADDR: + /* Add the address to the IP table */ + if (netdev->master) + nesvnic->local_ipaddr = + ((struct in_device *)netdev->master->ip_ptr)->ifa_list->ifa_address; + else + nesvnic->local_ipaddr = ifa->ifa_address; + + nes_write_indexed(nesdev, + NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), + ntohl(nesvnic->local_ipaddr)); + nes_manage_arp_cache(netdev, netdev->dev_addr, + ntohl(nesvnic->local_ipaddr), NES_ARP_ADD); + if (is_bonded) + continue; + else + return NOTIFY_OK; + break; + default: + break; + } + } + } + + return NOTIFY_DONE; +} + + +/** + * nes_net_event + */ +static int nes_net_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct neighbour *neigh = ptr; + struct nes_device *nesdev; + struct net_device *netdev; + struct nes_vnic *nesvnic; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + list_for_each_entry(nesdev, &nes_dev_list, list) { + /* nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p.\n", nesdev); */ + netdev = nesdev->netdev[0]; + nesvnic = netdev_priv(netdev); + if (netdev == neigh->dev) { + if (nesvnic->rdma_enabled == 0) { + nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n", + netdev->name); + } else { + if (neigh->nud_state & NUD_VALID) { + nes_manage_arp_cache(neigh->dev, neigh->ha, + ntohl(*(__be32 *)neigh->primary_key), NES_ARP_ADD); + } else { + nes_manage_arp_cache(neigh->dev, neigh->ha, + ntohl(*(__be32 *)neigh->primary_key), NES_ARP_DELETE); + } + } + return NOTIFY_OK; + } + } + break; + default: + nes_debug(NES_DBG_NETDEV, "NETEVENT_ %lu undefined\n", event); + break; + } + + return NOTIFY_DONE; +} + + +/** + * nes_add_ref + */ +void nes_add_ref(struct ib_qp *ibqp) +{ + struct nes_qp *nesqp; + + nesqp = to_nesqp(ibqp); + nes_debug(NES_DBG_QP, "Bumping refcount for QP%u. Pre-inc value = %u\n", + ibqp->qp_num, atomic_read(&nesqp->refcount)); + atomic_inc(&nesqp->refcount); +} + +static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + unsigned long flags; + struct nes_qp *nesqp = cqp_request->cqp_callback_pointer; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + atomic_inc(&qps_destroyed); + + /* Free the control structures */ + + if (nesqp->pbl_vbase) { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); + nesqp->pbl_vbase = NULL; + + } else { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + } + nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); + + nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; + kfree(nesqp->allocated_buffer); + +} + +/** + * nes_rem_ref + */ +void nes_rem_ref(struct ib_qp *ibqp) +{ + u64 u64temp; + struct nes_qp *nesqp; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + u32 opcode; + + nesqp = to_nesqp(ibqp); + + if (atomic_read(&nesqp->refcount) == 0) { + printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n", + __func__, ibqp->qp_num, nesqp->last_aeq); + BUG(); + } + + if (atomic_dec_and_test(&nesqp->refcount)) { + if (nesqp->pau_mode) + nes_destroy_pau_qp(nesdev, nesqp); + + /* Destroy the QP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); + return; + } + cqp_request->waiting = 0; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_cqp_rem_ref_callback; + cqp_request->cqp_callback_pointer = nesqp; + cqp_wqe = &cqp_request->cqp_wqe; + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + opcode = NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP; + + if (nesqp->hte_added) { + opcode |= NES_CQP_QP_DEL_HTE; + nesqp->hte_added = 0; + } + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + nes_post_cqp_request(nesdev, cqp_request); + } +} + + +/** + * nes_get_qp + */ +struct ib_qp *nes_get_qp(struct ib_device *device, int qpn) +{ + struct nes_vnic *nesvnic = to_nesvnic(device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + if ((qpn < NES_FIRST_QPN) || (qpn >= (NES_FIRST_QPN + nesadapter->max_qp))) + return NULL; + + return &nesadapter->qp_table[qpn - NES_FIRST_QPN]->ibqp; +} + + +/** + * nes_print_macaddr + */ +static void nes_print_macaddr(struct net_device *netdev) +{ + nes_debug(NES_DBG_INIT, "%s: %pM, IRQ %u\n", + netdev->name, netdev->dev_addr, netdev->irq); +} + +/** + * nes_interrupt - handle interrupts + */ +static irqreturn_t nes_interrupt(int irq, void *dev_id) +{ + struct nes_device *nesdev = (struct nes_device *)dev_id; + int handled = 0; + u32 int_mask; + u32 int_req; + u32 int_stat; + u32 intf_int_stat; + u32 timer_stat; + + if (nesdev->msi_enabled) { + /* No need to read the interrupt pending register if msi is enabled */ + handled = 1; + } else { + if (unlikely(nesdev->nesadapter->hw_rev == NE020_REV)) { + /* Master interrupt enable provides synchronization for kicking off bottom half + when interrupt sharing is going on */ + int_mask = nes_read32(nesdev->regs + NES_INT_MASK); + if (int_mask & 0x80000000) { + /* Check interrupt status to see if this might be ours */ + int_stat = nes_read32(nesdev->regs + NES_INT_STAT); + int_req = nesdev->int_req; + if (int_stat&int_req) { + /* if interesting CEQ or AEQ is pending, claim the interrupt */ + if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) { + handled = 1; + } else { + if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) { + /* Timer might be running but might be for another function */ + timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT); + if ((timer_stat & nesdev->timer_int_req) != 0) { + handled = 1; + } + } + if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) && + (handled == 0)) { + intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); + if ((intf_int_stat & nesdev->intf_int_req) != 0) { + handled = 1; + } + } + } + if (handled) { + nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000)); + int_mask = nes_read32(nesdev->regs+NES_INT_MASK); + /* Save off the status to save an additional read */ + nesdev->int_stat = int_stat; + nesdev->napi_isr_ran = 1; + } + } + } + } else { + handled = nes_read32(nesdev->regs+NES_INT_PENDING); + } + } + + if (handled) { + + if (nes_napi_isr(nesdev) == 0) { + tasklet_schedule(&nesdev->dpc_tasklet); + + } + return IRQ_HANDLED; + } else { + return IRQ_NONE; + } +} + + +/** + * nes_probe - Device initialization + */ +static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) +{ + struct net_device *netdev = NULL; + struct nes_device *nesdev = NULL; + int ret = 0; + void __iomem *mmio_regs = NULL; + u8 hw_rev; + + assert(pcidev != NULL); + assert(ent != NULL); + + printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n", + DRV_VERSION, pci_name(pcidev)); + + ret = pci_enable_device(pcidev); + if (ret) { + printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev)); + goto bail0; + } + + nes_debug(NES_DBG_INIT, "BAR0 (@0x%08lX) size = 0x%lX bytes\n", + (long unsigned int)pci_resource_start(pcidev, BAR_0), + (long unsigned int)pci_resource_len(pcidev, BAR_0)); + nes_debug(NES_DBG_INIT, "BAR1 (@0x%08lX) size = 0x%lX bytes\n", + (long unsigned int)pci_resource_start(pcidev, BAR_1), + (long unsigned int)pci_resource_len(pcidev, BAR_1)); + + /* Make sure PCI base addr are MMIO */ + if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) || + !(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) { + printk(KERN_ERR PFX "PCI regions not an MMIO resource\n"); + ret = -ENODEV; + goto bail1; + } + + /* Reserve PCI I/O and memory resources */ + ret = pci_request_regions(pcidev, DRV_NAME); + if (ret) { + printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev)); + goto bail1; + } + + if ((sizeof(dma_addr_t) > 4)) { + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64)); + if (ret < 0) { + printk(KERN_ERR PFX "64b DMA mask configuration failed\n"); + goto bail2; + } + ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64)); + if (ret) { + printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n"); + goto bail2; + } + } else { + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); + if (ret < 0) { + printk(KERN_ERR PFX "32b DMA mask configuration failed\n"); + goto bail2; + } + ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32)); + if (ret) { + printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n"); + goto bail2; + } + } + + pci_set_master(pcidev); + + /* Allocate hardware structure */ + nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL); + if (!nesdev) { + printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev)); + ret = -ENOMEM; + goto bail2; + } + + nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev); + nesdev->pcidev = pcidev; + pci_set_drvdata(pcidev, nesdev); + + pci_read_config_byte(pcidev, 0x0008, &hw_rev); + nes_debug(NES_DBG_INIT, "hw_rev=%u\n", hw_rev); + + spin_lock_init(&nesdev->indexed_regs_lock); + + /* Remap the PCI registers in adapter BAR0 to kernel VA space */ + mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), + pci_resource_len(pcidev, BAR_0)); + if (mmio_regs == NULL) { + printk(KERN_ERR PFX "Unable to remap BAR0\n"); + ret = -EIO; + goto bail3; + } + nesdev->regs = mmio_regs; + nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs; + + /* Ensure interrupts are disabled */ + nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); + + if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) { + if (!pci_enable_msi(nesdev->pcidev)) { + nesdev->msi_enabled = 1; + nes_debug(NES_DBG_INIT, "MSI is enabled for device %s\n", + pci_name(pcidev)); + } else { + nes_debug(NES_DBG_INIT, "MSI is disabled by linux for device %s\n", + pci_name(pcidev)); + } + } else { + nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n", + pci_name(pcidev)); + } + + nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0); + nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1); + + /* Init the adapter */ + nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev); + if (!nesdev->nesadapter) { + printk(KERN_ERR PFX "Unable to initialize adapter.\n"); + ret = -ENOMEM; + goto bail5; + } + nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + nesdev->nesadapter->wqm_quanta = wqm_quanta; + + /* nesdev->base_doorbell_index = + nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ + nesdev->base_doorbell_index = 1; + nesdev->doorbell_start = nesdev->nesadapter->doorbell_start; + if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + switch (PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count) { + case 1: + nesdev->mac_index = 2; + break; + case 2: + nesdev->mac_index = 1; + break; + case 3: + nesdev->mac_index = 3; + break; + case 0: + default: + nesdev->mac_index = 0; + } + } else { + nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count; + } + + if ((limit_maxrdreqsz || + ((nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_GLADIUS) && + (hw_rev == NE020_REV1))) && + (pcie_get_readrq(pcidev) > 256)) { + if (pcie_set_readrq(pcidev, 256)) + printk(KERN_ERR PFX "Unable to set max read request" + " to 256 bytes\n"); + else + nes_debug(NES_DBG_INIT, "Max read request size set" + " to 256 bytes\n"); + } + + tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev); + + /* bring up the Control QP */ + if (nes_init_cqp(nesdev)) { + ret = -ENODEV; + goto bail6; + } + + /* Arm the CCQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + PCI_FUNC(nesdev->pcidev->devfn)); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + + /* Enable the interrupts */ + nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) | + (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); + if (PCI_FUNC(nesdev->pcidev->devfn) < 4) { + nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24)); + } + + /* TODO: This really should be the first driver to load, not function 0 */ + if (PCI_FUNC(nesdev->pcidev->devfn) == 0) { + /* pick up PCI and critical errors if the first driver to load */ + nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR; + nesdev->int_req |= NES_INT_INTF; + } else { + nesdev->intf_int_req = 0; + } + nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804); + + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790); + + /* deal with both periodic and one_shot */ + nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn); + nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req; + nes_debug(NES_DBG_INIT, "setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n", + PCI_FUNC(nesdev->pcidev->devfn), + nesdev->timer_int_req, nesdev->nesadapter->timer_int_req); + + nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + + list_add_tail(&nesdev->list, &nes_dev_list); + + /* Request an interrupt line for the driver */ + ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev); + if (ret) { + printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", + pci_name(pcidev), pcidev->irq); + goto bail65; + } + + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + + if (nes_notifiers_registered == 0) { + register_inetaddr_notifier(&nes_inetaddr_notifier); + register_netevent_notifier(&nes_net_notifier); + } + nes_notifiers_registered++; + + INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); + + /* Initialize network devices */ + if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) + goto bail7; + + /* Register network device */ + ret = register_netdev(netdev); + if (ret) { + printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret); + nes_netdev_destroy(netdev); + goto bail7; + } + + nes_print_macaddr(netdev); + + nesdev->netdev_count++; + nesdev->nesadapter->netdev_count++; + + printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n", + pci_name(pcidev)); + return 0; + + bail7: + printk(KERN_ERR PFX "bail7\n"); + while (nesdev->netdev_count > 0) { + nesdev->netdev_count--; + nesdev->nesadapter->netdev_count--; + + unregister_netdev(nesdev->netdev[nesdev->netdev_count]); + nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]); + } + + nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", + nesdev->netdev_count, nesdev->nesadapter->netdev_count); + + nes_notifiers_registered--; + if (nes_notifiers_registered == 0) { + unregister_netevent_notifier(&nes_net_notifier); + unregister_inetaddr_notifier(&nes_inetaddr_notifier); + } + + list_del(&nesdev->list); + nes_destroy_cqp(nesdev); + + bail65: + printk(KERN_ERR PFX "bail65\n"); + free_irq(pcidev->irq, nesdev); + if (nesdev->msi_enabled) { + pci_disable_msi(pcidev); + } + bail6: + printk(KERN_ERR PFX "bail6\n"); + tasklet_kill(&nesdev->dpc_tasklet); + /* Deallocate the Adapter Structure */ + nes_destroy_adapter(nesdev->nesadapter); + + bail5: + printk(KERN_ERR PFX "bail5\n"); + iounmap(nesdev->regs); + + bail3: + printk(KERN_ERR PFX "bail3\n"); + kfree(nesdev); + + bail2: + pci_release_regions(pcidev); + + bail1: + pci_disable_device(pcidev); + + bail0: + return ret; +} + + +/** + * nes_remove - unload from kernel + */ +static void __devexit nes_remove(struct pci_dev *pcidev) +{ + struct nes_device *nesdev = pci_get_drvdata(pcidev); + struct net_device *netdev; + int netdev_index = 0; + unsigned long flags; + + if (nesdev->netdev_count) { + netdev = nesdev->netdev[netdev_index]; + if (netdev) { + netif_stop_queue(netdev); + unregister_netdev(netdev); + nes_netdev_destroy(netdev); + + nesdev->netdev[netdev_index] = NULL; + nesdev->netdev_count--; + nesdev->nesadapter->netdev_count--; + } + } + + nes_notifiers_registered--; + if (nes_notifiers_registered == 0) { + unregister_netevent_notifier(&nes_net_notifier); + unregister_inetaddr_notifier(&nes_inetaddr_notifier); + } + + list_del(&nesdev->list); + nes_destroy_cqp(nesdev); + + free_irq(pcidev->irq, nesdev); + tasklet_kill(&nesdev->dpc_tasklet); + + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + if (nesdev->link_recheck) { + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + cancel_delayed_work_sync(&nesdev->work); + } else { + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + } + + /* Deallocate the Adapter Structure */ + nes_destroy_adapter(nesdev->nesadapter); + + if (nesdev->msi_enabled) { + pci_disable_msi(pcidev); + } + + iounmap(nesdev->regs); + kfree(nesdev); + + /* nes_debug(NES_DBG_SHUTDOWN, "calling pci_release_regions.\n"); */ + pci_release_regions(pcidev); + pci_disable_device(pcidev); + pci_set_drvdata(pcidev, NULL); +} + + +static struct pci_driver nes_pci_driver = { + .name = DRV_NAME, + .id_table = nes_pci_table, + .probe = nes_probe, + .remove = __devexit_p(nes_remove), +}; + +static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf) +{ + unsigned int devfn = 0xffffffff; + unsigned char bus_number = 0xff; + unsigned int i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + devfn = nesdev->pcidev->devfn; + bus_number = nesdev->pcidev->bus->number; + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn); +} + +static ssize_t nes_store_adapter(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + + ee_flsh_adapter = simple_strtoul(p, &p, 10); + return strnlen(buf, count); +} + +static ssize_t nes_show_ee_cmd(struct device_driver *ddp, char *buf) +{ + u32 eeprom_cmd = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + eeprom_cmd = nes_read32(nesdev->regs + NES_EEPROM_COMMAND); + break; + } + i++; + } + return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd); +} + +static ssize_t nes_store_ee_cmd(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_EEPROM_COMMAND, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_ee_data(struct device_driver *ddp, char *buf) +{ + u32 eeprom_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + eeprom_data = nes_read32(nesdev->regs + NES_EEPROM_DATA); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data); +} + +static ssize_t nes_store_ee_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_EEPROM_DATA, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_flash_cmd(struct device_driver *ddp, char *buf) +{ + u32 flash_cmd = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + flash_cmd = nes_read32(nesdev->regs + NES_FLASH_COMMAND); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd); +} + +static ssize_t nes_store_flash_cmd(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_FLASH_COMMAND, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_flash_data(struct device_driver *ddp, char *buf) +{ + u32 flash_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + flash_data = nes_read32(nesdev->regs + NES_FLASH_DATA); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data); +} + +static ssize_t nes_store_flash_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + NES_FLASH_DATA, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_nonidx_addr(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr); +} + +static ssize_t nes_store_nonidx_addr(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') + sysfs_nonidx_addr = simple_strtoul(p, &p, 16); + + return strnlen(buf, count); +} + +static ssize_t nes_show_nonidx_data(struct device_driver *ddp, char *buf) +{ + u32 nonidx_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nonidx_data = nes_read32(nesdev->regs + sysfs_nonidx_addr); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data); +} + +static ssize_t nes_store_nonidx_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write32(nesdev->regs + sysfs_nonidx_addr, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + +static ssize_t nes_show_idx_addr(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr); +} + +static ssize_t nes_store_idx_addr(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') + sysfs_idx_addr = simple_strtoul(p, &p, 16); + + return strnlen(buf, count); +} + +static ssize_t nes_show_idx_data(struct device_driver *ddp, char *buf) +{ + u32 idx_data = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + idx_data = nes_read_indexed(nesdev, sysfs_idx_addr); + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data); +} + +static ssize_t nes_store_idx_data(struct device_driver *ddp, + const char *buf, size_t count) +{ + char *p = (char *)buf; + u32 val; + u32 i = 0; + struct nes_device *nesdev; + + if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { + val = simple_strtoul(p, &p, 16); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nes_write_indexed(nesdev, sysfs_idx_addr, val); + break; + } + i++; + } + } + return strnlen(buf, count); +} + + +/** + * nes_show_wqm_quanta + */ +static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf) +{ + u32 wqm_quanta_value = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + wqm_quanta_value = nesdev->nesadapter->wqm_quanta; + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value); +} + + +/** + * nes_store_wqm_quanta + */ +static ssize_t nes_store_wqm_quanta(struct device_driver *ddp, + const char *buf, size_t count) +{ + unsigned long wqm_quanta_value; + u32 wqm_config1; + u32 i = 0; + struct nes_device *nesdev; + + if (kstrtoul(buf, 0, &wqm_quanta_value) < 0) + return -EINVAL; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nesdev->nesadapter->wqm_quanta = wqm_quanta_value; + wqm_config1 = nes_read_indexed(nesdev, + NES_IDX_WQM_CONFIG1); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, + ((wqm_quanta_value << 1) | + (wqm_config1 & 0x00000001))); + break; + } + i++; + } + return strnlen(buf, count); +} + +static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR, + nes_show_adapter, nes_store_adapter); +static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR, + nes_show_ee_cmd, nes_store_ee_cmd); +static DRIVER_ATTR(eeprom_data, S_IRUSR | S_IWUSR, + nes_show_ee_data, nes_store_ee_data); +static DRIVER_ATTR(flash_cmd, S_IRUSR | S_IWUSR, + nes_show_flash_cmd, nes_store_flash_cmd); +static DRIVER_ATTR(flash_data, S_IRUSR | S_IWUSR, + nes_show_flash_data, nes_store_flash_data); +static DRIVER_ATTR(nonidx_addr, S_IRUSR | S_IWUSR, + nes_show_nonidx_addr, nes_store_nonidx_addr); +static DRIVER_ATTR(nonidx_data, S_IRUSR | S_IWUSR, + nes_show_nonidx_data, nes_store_nonidx_data); +static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR, + nes_show_idx_addr, nes_store_idx_addr); +static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR, + nes_show_idx_data, nes_store_idx_data); +static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR, + nes_show_wqm_quanta, nes_store_wqm_quanta); + +static int nes_create_driver_sysfs(struct pci_driver *drv) +{ + int error; + error = driver_create_file(&drv->driver, &driver_attr_adapter); + error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd); + error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data); + error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd); + error |= driver_create_file(&drv->driver, &driver_attr_flash_data); + error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr); + error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); + error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); + error |= driver_create_file(&drv->driver, &driver_attr_idx_data); + error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta); + return error; +} + +static void nes_remove_driver_sysfs(struct pci_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_adapter); + driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd); + driver_remove_file(&drv->driver, &driver_attr_eeprom_data); + driver_remove_file(&drv->driver, &driver_attr_flash_cmd); + driver_remove_file(&drv->driver, &driver_attr_flash_data); + driver_remove_file(&drv->driver, &driver_attr_nonidx_addr); + driver_remove_file(&drv->driver, &driver_attr_nonidx_data); + driver_remove_file(&drv->driver, &driver_attr_idx_addr); + driver_remove_file(&drv->driver, &driver_attr_idx_data); + driver_remove_file(&drv->driver, &driver_attr_wqm_quanta); +} + +/** + * nes_init_module - module initialization entry point + */ +static int __init nes_init_module(void) +{ + int retval; + int retval1; + + retval = nes_cm_start(); + if (retval) { + printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n"); + return retval; + } + retval = pci_register_driver(&nes_pci_driver); + if (retval >= 0) { + retval1 = nes_create_driver_sysfs(&nes_pci_driver); + if (retval1 < 0) + printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n"); + } + return retval; +} + + +/** + * nes_exit_module - module unload entry point + */ +static void __exit nes_exit_module(void) +{ + nes_cm_stop(); + nes_remove_driver_sysfs(&nes_pci_driver); + + pci_unregister_driver(&nes_pci_driver); +} + + +module_init(nes_init_module); +module_exit(nes_exit_module); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h new file mode 100644 index 00000000..c438e469 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes.h @@ -0,0 +1,570 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NES_H +#define __NES_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define NES_SEND_FIRST_WRITE + +#define QUEUE_DISCONNECTS + +#define DRV_NAME "iw_nes" +#define DRV_VERSION "1.5.0.0" +#define PFX DRV_NAME ": " + +/* + * NetEffect PCI vendor id and NE010 PCI device id. + */ +#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ +#define PCI_VENDOR_ID_NETEFFECT 0x1678 +#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 +#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110 +#endif + +#define NE020_REV 4 +#define NE020_REV1 5 + +#define BAR_0 0 +#define BAR_1 2 + +#define RX_BUF_SIZE (1536 + 8) +#define NES_REG0_SIZE (4 * 1024) +#define NES_TX_TIMEOUT (6*HZ) +#define NES_FIRST_QPN 64 +#define NES_SW_CONTEXT_ALIGN 1024 + +#define NES_NIC_MAX_NICS 16 +#define NES_MAX_ARP_TABLE_SIZE 4096 + +#define NES_NIC_CEQ_SIZE 8 +/* NICs will be on a separate CQ */ +#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32) + +#define NES_MAX_PORT_COUNT 4 + +#define MAX_DPC_ITERATIONS 128 + +#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 +#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 +#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 +#define NES_DRV_OPT_DISABLE_INTF 0x00000008 +#define NES_DRV_OPT_ENABLE_MSI 0x00000010 +#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020 +#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040 +#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 +#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100 +#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 +#define NES_DRV_OPT_ENABLE_PAU 0x00000400 + +#define NES_AEQ_EVENT_TIMEOUT 2500 +#define NES_DISCONNECT_EVENT_TIMEOUT 2000 + +/* debug levels */ +/* must match userspace */ +#define NES_DBG_HW 0x00000001 +#define NES_DBG_INIT 0x00000002 +#define NES_DBG_ISR 0x00000004 +#define NES_DBG_PHY 0x00000008 +#define NES_DBG_NETDEV 0x00000010 +#define NES_DBG_CM 0x00000020 +#define NES_DBG_CM1 0x00000040 +#define NES_DBG_NIC_RX 0x00000080 +#define NES_DBG_NIC_TX 0x00000100 +#define NES_DBG_CQP 0x00000200 +#define NES_DBG_MMAP 0x00000400 +#define NES_DBG_MR 0x00000800 +#define NES_DBG_PD 0x00001000 +#define NES_DBG_CQ 0x00002000 +#define NES_DBG_QP 0x00004000 +#define NES_DBG_MOD_QP 0x00008000 +#define NES_DBG_AEQ 0x00010000 +#define NES_DBG_IW_RX 0x00020000 +#define NES_DBG_IW_TX 0x00040000 +#define NES_DBG_SHUTDOWN 0x00080000 +#define NES_DBG_PAU 0x00100000 +#define NES_DBG_RSVD1 0x10000000 +#define NES_DBG_RSVD2 0x20000000 +#define NES_DBG_RSVD3 0x40000000 +#define NES_DBG_RSVD4 0x80000000 +#define NES_DBG_ALL 0xffffffff + +#ifdef CONFIG_INFINIBAND_NES_DEBUG +#define nes_debug(level, fmt, args...) \ +do { \ + if (level & nes_debug_level) \ + printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \ +} while (0) + +#define assert(expr) \ +do { \ + if (!(expr)) { \ + printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ + #expr, __FILE__, __func__, __LINE__); \ + } \ +} while (0) + +#define NES_EVENT_TIMEOUT 1200000 +#else +#define nes_debug(level, fmt, args...) +#define assert(expr) do {} while (0) + +#define NES_EVENT_TIMEOUT 100000 +#endif + +#include "nes_hw.h" +#include "nes_verbs.h" +#include "nes_context.h" +#include "nes_user.h" +#include "nes_cm.h" +#include "nes_mgt.h" + +extern int max_mtu; +#define max_frame_len (max_mtu+ETH_HLEN) +extern int interrupt_mod_interval; +extern int nes_if_count; +extern int mpa_version; +extern int disable_mpa_crc; +extern unsigned int send_first; +extern unsigned int nes_drv_opt; +extern unsigned int nes_debug_level; +extern unsigned int wqm_quanta; +extern struct list_head nes_adapter_list; + +extern atomic_t cm_connects; +extern atomic_t cm_accepts; +extern atomic_t cm_disconnects; +extern atomic_t cm_closes; +extern atomic_t cm_connecteds; +extern atomic_t cm_connect_reqs; +extern atomic_t cm_rejects; +extern atomic_t mod_qp_timouts; +extern atomic_t qps_created; +extern atomic_t qps_destroyed; +extern atomic_t sw_qps_destroyed; +extern u32 mh_detected; +extern u32 mh_pauses_sent; +extern u32 cm_packets_sent; +extern u32 cm_packets_bounced; +extern u32 cm_packets_created; +extern u32 cm_packets_received; +extern u32 cm_packets_dropped; +extern u32 cm_packets_retrans; +extern atomic_t cm_listens_created; +extern atomic_t cm_listens_destroyed; +extern u32 cm_backlog_drops; +extern atomic_t cm_loopbacks; +extern atomic_t cm_nodes_created; +extern atomic_t cm_nodes_destroyed; +extern atomic_t cm_accel_dropped_pkts; +extern atomic_t cm_resets_recvd; +extern atomic_t pau_qps_created; +extern atomic_t pau_qps_destroyed; + +extern u32 int_mod_timer_init; +extern u32 int_mod_cq_depth_256; +extern u32 int_mod_cq_depth_128; +extern u32 int_mod_cq_depth_32; +extern u32 int_mod_cq_depth_24; +extern u32 int_mod_cq_depth_16; +extern u32 int_mod_cq_depth_4; +extern u32 int_mod_cq_depth_1; + +struct nes_device { + struct nes_adapter *nesadapter; + void __iomem *regs; + void __iomem *index_reg; + struct pci_dev *pcidev; + struct net_device *netdev[NES_NIC_MAX_NICS]; + u64 link_status_interrupts; + struct tasklet_struct dpc_tasklet; + spinlock_t indexed_regs_lock; + unsigned long csr_start; + unsigned long doorbell_region; + unsigned long doorbell_start; + unsigned long mac_tx_errors; + unsigned long mac_pause_frames_sent; + unsigned long mac_pause_frames_received; + unsigned long mac_rx_errors; + unsigned long mac_rx_crc_errors; + unsigned long mac_rx_symbol_err_frames; + unsigned long mac_rx_jabber_frames; + unsigned long mac_rx_oversized_frames; + unsigned long mac_rx_short_frames; + unsigned long port_rx_discards; + unsigned long port_tx_discards; + unsigned int mac_index; + unsigned int nes_stack_start; + + /* Control Structures */ + void *cqp_vbase; + dma_addr_t cqp_pbase; + u32 cqp_mem_size; + u8 ceq_index; + u8 nic_ceq_index; + struct nes_hw_cqp cqp; + struct nes_hw_cq ccq; + struct list_head cqp_avail_reqs; + struct list_head cqp_pending_reqs; + struct nes_cqp_request *nes_cqp_requests; + + u32 int_req; + u32 int_stat; + u32 timer_int_req; + u32 timer_only_int_count; + u32 intf_int_req; + u32 last_mac_tx_pauses; + u32 last_used_chunks_tx; + struct list_head list; + + u16 base_doorbell_index; + u16 currcq_count; + u16 deepcq_count; + u8 iw_status; + u8 msi_enabled; + u8 netdev_count; + u8 napi_isr_ran; + u8 disable_rx_flow_control; + u8 disable_tx_flow_control; + + struct delayed_work work; + u8 link_recheck; +}; + +/* Receive skb private area - must fit in skb->cb area */ +struct nes_rskb_cb { + u64 busaddr; + u32 maplen; + u32 seqnum; + u8 *data_start; + struct nes_qp *nesqp; +}; + +static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) +{ + u32 crc_value; + crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad)); + + /* + * With commit ef19454b ("[LIB] crc32c: Keep intermediate crc + * state in cpu order"), behavior of crc32c changes on + * big-endian platforms. Our algorithm expects the previous + * behavior; otherwise we have RDMA connection establishment + * issue on big-endian. + */ + return cpu_to_le32(crc_value); +} + +static inline void +set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) +{ + wqe_words[index] = cpu_to_le32((u32) value); + wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value)); +} + +static inline void +set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value) +{ + wqe_words[index] = cpu_to_le32(value); +} + +static inline void +nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev) +{ + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_HIGH_IDX] = 0; +} + +static inline void +nes_fill_init_qp_wqe(struct nes_hw_qp_wqe *wqe, struct nes_qp *nesqp, u32 head) +{ + u32 value; + value = ((u32)((unsigned long) nesqp)) | head; + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX, + (u32)(upper_32_bits((unsigned long)(nesqp)))); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, value); +} + +/* Read from memory-mapped device */ +static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index) +{ + unsigned long flags; + void __iomem *addr = nesdev->index_reg; + u32 value; + + spin_lock_irqsave(&nesdev->indexed_regs_lock, flags); + + writel(reg_index, addr); + value = readl((void __iomem *)addr + 4); + + spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags); + return value; +} + +static inline u32 nes_read32(const void __iomem *addr) +{ + return readl(addr); +} + +static inline u16 nes_read16(const void __iomem *addr) +{ + return readw(addr); +} + +static inline u8 nes_read8(const void __iomem *addr) +{ + return readb(addr); +} + +/* Write to memory-mapped device */ +static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val) +{ + unsigned long flags; + void __iomem *addr = nesdev->index_reg; + + spin_lock_irqsave(&nesdev->indexed_regs_lock, flags); + + writel(reg_index, addr); + writel(val, (void __iomem *)addr + 4); + + spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags); +} + +static inline void nes_write32(void __iomem *addr, u32 val) +{ + writel(val, addr); +} + +static inline void nes_write16(void __iomem *addr, u16 val) +{ + writew(val, addr); +} + +static inline void nes_write8(void __iomem *addr, u8 val) +{ + writeb(val, addr); +} + + + +static inline int nes_alloc_resource(struct nes_adapter *nesadapter, + unsigned long *resource_array, u32 max_resources, + u32 *req_resource_num, u32 *next) +{ + unsigned long flags; + u32 resource_num; + + spin_lock_irqsave(&nesadapter->resource_lock, flags); + + resource_num = find_next_zero_bit(resource_array, max_resources, *next); + if (resource_num >= max_resources) { + resource_num = find_first_zero_bit(resource_array, max_resources); + if (resource_num >= max_resources) { + printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__); + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); + return -EMFILE; + } + } + set_bit(resource_num, resource_array); + *next = resource_num+1; + if (*next == max_resources) { + *next = 0; + } + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); + *req_resource_num = resource_num; + + return 0; +} + +static inline int nes_is_resource_allocated(struct nes_adapter *nesadapter, + unsigned long *resource_array, u32 resource_num) +{ + unsigned long flags; + int bit_is_set; + + spin_lock_irqsave(&nesadapter->resource_lock, flags); + + bit_is_set = test_bit(resource_num, resource_array); + nes_debug(NES_DBG_HW, "resource_num %u is%s allocated.\n", + resource_num, (bit_is_set ? "": " not")); + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); + + return bit_is_set; +} + +static inline void nes_free_resource(struct nes_adapter *nesadapter, + unsigned long *resource_array, u32 resource_num) +{ + unsigned long flags; + + spin_lock_irqsave(&nesadapter->resource_lock, flags); + clear_bit(resource_num, resource_array); + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); +} + +static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev) +{ + return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic; +} + +static inline struct nes_pd *to_nespd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct nes_pd, ibpd); +} + +static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct nes_ucontext, ibucontext); +} + +static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct nes_mr, ibmr); +} + +static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct nes_mr, ibfmr); +} + +static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct nes_mr, ibmw); +} + +static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr) +{ + return container_of(nesmr, struct nes_fmr, nesmr); +} + +static inline struct nes_cq *to_nescq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct nes_cq, ibcq); +} + +static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct nes_qp, ibqp); +} + + + +/* nes.c */ +void nes_add_ref(struct ib_qp *); +void nes_rem_ref(struct ib_qp *); +struct ib_qp *nes_get_qp(struct ib_device *, int); + + +/* nes_hw.c */ +struct nes_adapter *nes_init_adapter(struct nes_device *, u8); +void nes_nic_init_timer_defaults(struct nes_device *, u8); +void nes_destroy_adapter(struct nes_adapter *); +int nes_init_cqp(struct nes_device *); +int nes_init_phy(struct nes_device *); +int nes_init_nic_qp(struct nes_device *, struct net_device *); +void nes_destroy_nic_qp(struct nes_vnic *); +int nes_napi_isr(struct nes_device *); +void nes_dpc(unsigned long); +void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *); +void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *); +int nes_destroy_cqp(struct nes_device *); +int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); +void nes_recheck_link_status(struct work_struct *work); + +/* nes_nic.c */ +struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); +void nes_netdev_destroy(struct net_device *); +int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); + +/* nes_cm.c */ +void *nes_cm_create(struct net_device *); +int nes_cm_recv(struct sk_buff *, struct net_device *); +void nes_update_arp(unsigned char *, u32, u32, u16, u16); +void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32); +void nes_sock_release(struct nes_qp *, unsigned long *); +void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32); +int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32); +int nes_cm_disconn(struct nes_qp *); +void nes_cm_disconn_worker(void *); + +/* nes_verbs.c */ +int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32); +int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); +struct nes_ib_device *nes_init_ofa_device(struct net_device *); +void nes_port_ibevent(struct nes_vnic *nesvnic); +void nes_destroy_ofa_device(struct nes_ib_device *); +int nes_register_ofa_device(struct nes_ib_device *); + +/* nes_util.c */ +int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *); +void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16); +void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); +void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); +void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); +struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); +void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *); +int nes_arp_table(struct nes_device *, u32, u8 *, u32); +void nes_mh_fix(unsigned long); +void nes_clc(unsigned long); +void nes_dump_mem(unsigned int, void *, int); +u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32); + +#endif /* __NES_H */ diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c new file mode 100644 index 00000000..71edfbbc --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -0,0 +1,3903 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#define TCPOPT_TIMESTAMP 8 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nes.h" + +u32 cm_packets_sent; +u32 cm_packets_bounced; +u32 cm_packets_dropped; +u32 cm_packets_retrans; +u32 cm_packets_created; +u32 cm_packets_received; +atomic_t cm_listens_created; +atomic_t cm_listens_destroyed; +u32 cm_backlog_drops; +atomic_t cm_loopbacks; +atomic_t cm_nodes_created; +atomic_t cm_nodes_destroyed; +atomic_t cm_accel_dropped_pkts; +atomic_t cm_resets_recvd; + +static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); +static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *); +static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); +static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *); +static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); +static int mini_cm_dealloc_core(struct nes_cm_core *); +static int mini_cm_get(struct nes_cm_core *); +static int mini_cm_set(struct nes_cm_core *, u32, u32); + +static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8); +static int add_ref_cm_node(struct nes_cm_node *); +static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + +static int nes_cm_disconn_true(struct nes_qp *); +static int nes_cm_post_event(struct nes_cm_event *event); +static int nes_disconnect(struct nes_qp *nesqp, int abrupt); +static void nes_disconnect_worker(struct work_struct *work); + +static int send_mpa_request(struct nes_cm_node *, struct sk_buff *); +static int send_mpa_reject(struct nes_cm_node *); +static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); +static int send_reset(struct nes_cm_node *, struct sk_buff *); +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); +static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); +static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *); + +static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void cleanup_retrans_entry(struct nes_cm_node *); +static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *); +static void free_retrans_entry(struct nes_cm_node *cm_node); +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive); + +/* CM event handler functions */ +static void cm_event_connected(struct nes_cm_event *); +static void cm_event_connect_error(struct nes_cm_event *); +static void cm_event_reset(struct nes_cm_event *); +static void cm_event_mpa_req(struct nes_cm_event *); +static void cm_event_mpa_reject(struct nes_cm_event *); +static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node); + +/* MPA build functions */ +static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8); +static void build_mpa_v2(struct nes_cm_node *, void *, u8); +static void build_mpa_v1(struct nes_cm_node *, void *, u8); +static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **); + +static void print_core(struct nes_cm_core *core); + +/* External CM API Interface */ +/* instance of function pointers for client API */ +/* set address of this instance to cm_core->cm_ops at cm_core alloc */ +static struct nes_cm_ops nes_cm_api = { + mini_cm_accelerated, + mini_cm_listen, + mini_cm_del_listen, + mini_cm_connect, + mini_cm_close, + mini_cm_accept, + mini_cm_reject, + mini_cm_recv_pkt, + mini_cm_dealloc_core, + mini_cm_get, + mini_cm_set +}; + +static struct nes_cm_core *g_cm_core; + +atomic_t cm_connects; +atomic_t cm_accepts; +atomic_t cm_disconnects; +atomic_t cm_closes; +atomic_t cm_connecteds; +atomic_t cm_connect_reqs; +atomic_t cm_rejects; + +int nes_add_ref_cm_node(struct nes_cm_node *cm_node) +{ + return add_ref_cm_node(cm_node); +} + +int nes_rem_ref_cm_node(struct nes_cm_node *cm_node) +{ + return rem_ref_cm_node(cm_node->cm_core, cm_node); +} + +/** + * create_event + */ +static struct nes_cm_event *create_event(struct nes_cm_node * cm_node, + enum nes_cm_event_type type) +{ + struct nes_cm_event *event; + + if (!cm_node->cm_id) + return NULL; + + /* allocate an empty event */ + event = kzalloc(sizeof(*event), GFP_ATOMIC); + + if (!event) + return NULL; + + event->type = type; + event->cm_node = cm_node; + event->cm_info.rem_addr = cm_node->rem_addr; + event->cm_info.loc_addr = cm_node->loc_addr; + event->cm_info.rem_port = cm_node->rem_port; + event->cm_info.loc_port = cm_node->loc_port; + event->cm_info.cm_id = cm_node->cm_id; + + nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " + "dst_addr=%08x[%x], src_addr=%08x[%x]\n", + cm_node, event, type, event->cm_info.loc_addr, + event->cm_info.loc_port, event->cm_info.rem_addr, + event->cm_info.rem_port); + + nes_cm_post_event(event); + return event; +} + + +/** + * send_mpa_request + */ +static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; + + if (!skb) { + nes_debug(NES_DBG_CM, "skb set to NULL\n"); + return -1; + } + + /* send an MPA Request frame */ + cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST); + form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK); + + return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); +} + + + +static int send_mpa_reject(struct nes_cm_node *cm_node) +{ + struct sk_buff *skb = NULL; + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; + struct ietf_mpa_v1 *mpa_frame; + + skb = dev_alloc_skb(MAX_CM_BUFFER); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -ENOMEM; + } + + /* send an MPA reject frame */ + cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); + mpa_frame = (struct ietf_mpa_v1 *)*start_buff; + mpa_frame->flags |= IETF_MPA_FLAGS_REJECT; + form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN); + + cm_node->state = NES_CM_STATE_FIN_WAIT1; + return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); +} + + +/** + * recv_mpa - process a received TCP pkt, we are expecting an + * IETF MPA frame + */ +static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, + u32 len) +{ + struct ietf_mpa_v1 *mpa_frame; + struct ietf_mpa_v2 *mpa_v2_frame; + struct ietf_rtr_msg *rtr_msg; + int mpa_hdr_len; + int priv_data_len; + + *type = NES_MPA_REQUEST_ACCEPT; + + /* assume req frame is in tcp data payload */ + if (len < sizeof(struct ietf_mpa_v1)) { + nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); + return -EINVAL; + } + + /* points to the beginning of the frame, which could be MPA V1 or V2 */ + mpa_frame = (struct ietf_mpa_v1 *)buffer; + mpa_hdr_len = sizeof(struct ietf_mpa_v1); + priv_data_len = ntohs(mpa_frame->priv_data_len); + + /* make sure mpa private data len is less than 512 bytes */ + if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) { + nes_debug(NES_DBG_CM, "The received Length of Private" + " Data field exceeds 512 octets\n"); + return -EINVAL; + } + /* + * make sure MPA receiver interoperate with the + * received MPA version and MPA key information + * + */ + if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) { + nes_debug(NES_DBG_CM, "The received mpa version" + " is not supported\n"); + return -EINVAL; + } + /* + * backwards compatibility only + */ + if (mpa_frame->rev > cm_node->mpa_frame_rev) { + nes_debug(NES_DBG_CM, "The received mpa version" + " can not be interoperated\n"); + return -EINVAL; + } else { + cm_node->mpa_frame_rev = mpa_frame->rev; + } + + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { + nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); + return -EINVAL; + } + } else { + if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) { + nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); + return -EINVAL; + } + } + + + if (priv_data_len + mpa_hdr_len != len) { + nes_debug(NES_DBG_CM, "The received ietf buffer was not right" + " complete (%x + %x != %x)\n", + priv_data_len, mpa_hdr_len, len); + return -EINVAL; + } + /* make sure it does not exceed the max size */ + if (len > MAX_CM_BUFFER) { + nes_debug(NES_DBG_CM, "The received ietf buffer was too large" + " (%x + %x != %x)\n", + priv_data_len, mpa_hdr_len, len); + return -EINVAL; + } + + cm_node->mpa_frame_size = priv_data_len; + + switch (mpa_frame->rev) { + case IETF_MPA_V2: { + u16 ird_size; + u16 ord_size; + u16 rtr_ctrl_ird; + u16 rtr_ctrl_ord; + + mpa_v2_frame = (struct ietf_mpa_v2 *)buffer; + mpa_hdr_len += IETF_RTR_MSG_SIZE; + cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE; + rtr_msg = &mpa_v2_frame->rtr_msg; + + /* parse rtr message */ + rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird); + rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord); + ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD; + ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD; + + if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) { + /* send reset */ + return -EINVAL; + } + + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + /* responder */ + if (cm_node->ord_size > ird_size) + cm_node->ord_size = ird_size; + } else { + /* initiator */ + if (cm_node->ord_size > ird_size) + cm_node->ord_size = ird_size; + + if (cm_node->ird_size < ord_size) { + /* no resources available */ + /* send terminate message */ + return -EINVAL; + } + } + + if (rtr_ctrl_ord & IETF_RDMA0_READ) { + cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) { + cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; + } else { /* Not supported RDMA0 operation */ + return -EINVAL; + } + break; + } + case IETF_MPA_V1: + default: + break; + } + + /* copy entire MPA frame to our cm_node's frame */ + memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size); + + if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) + *type = NES_MPA_REQUEST_REJECT; + return 0; +} + + +/** + * form_cm_frame - get a free packet and build empty frame Use + * node info to build. + */ +static void form_cm_frame(struct sk_buff *skb, + struct nes_cm_node *cm_node, void *options, u32 optionsize, + void *data, u32 datasize, u8 flags) +{ + struct tcphdr *tcph; + struct iphdr *iph; + struct ethhdr *ethh; + u8 *buf; + u16 packetsize = sizeof(*iph); + + packetsize += sizeof(*tcph); + packetsize += optionsize + datasize; + + skb_trim(skb, 0); + memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph)); + + buf = skb_put(skb, packetsize + ETH_HLEN); + + ethh = (struct ethhdr *)buf; + buf += ETH_HLEN; + + iph = (struct iphdr *)buf; + buf += sizeof(*iph); + tcph = (struct tcphdr *)buf; + skb_reset_mac_header(skb); + skb_set_network_header(skb, ETH_HLEN); + skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph)); + buf += sizeof(*tcph); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->protocol = htons(0x800); + skb->data_len = 0; + skb->mac_len = ETH_HLEN; + + memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN); + memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN); + ethh->h_proto = htons(0x0800); + + iph->version = IPVERSION; + iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ + iph->tos = 0; + iph->tot_len = htons(packetsize); + iph->id = htons(++cm_node->tcp_cntxt.loc_id); + + iph->frag_off = htons(0x4000); + iph->ttl = 0x40; + iph->protocol = 0x06; /* IPPROTO_TCP */ + + iph->saddr = htonl(cm_node->loc_addr); + iph->daddr = htonl(cm_node->rem_addr); + + tcph->source = htons(cm_node->loc_port); + tcph->dest = htons(cm_node->rem_port); + tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); + + if (flags & SET_ACK) { + cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; + tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num); + tcph->ack = 1; + } else { + tcph->ack_seq = 0; + } + + if (flags & SET_SYN) { + cm_node->tcp_cntxt.loc_seq_num++; + tcph->syn = 1; + } else { + cm_node->tcp_cntxt.loc_seq_num += datasize; + } + + if (flags & SET_FIN) { + cm_node->tcp_cntxt.loc_seq_num++; + tcph->fin = 1; + } + + if (flags & SET_RST) + tcph->rst = 1; + + tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2); + tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd); + tcph->urg_ptr = 0; + if (optionsize) + memcpy(buf, options, optionsize); + buf += optionsize; + if (datasize) + memcpy(buf, data, datasize); + + skb_shinfo(skb)->nr_frags = 0; + cm_packets_created++; +} + +/** + * print_core - dump a cm core + */ +static void print_core(struct nes_cm_core *core) +{ + nes_debug(NES_DBG_CM, "---------------------------------------------\n"); + nes_debug(NES_DBG_CM, "CM Core -- (core = %p )\n", core); + if (!core) + return; + nes_debug(NES_DBG_CM, "---------------------------------------------\n"); + + nes_debug(NES_DBG_CM, "State : %u \n", core->state); + + nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); + nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); + + nes_debug(NES_DBG_CM, "core : %p \n", core); + + nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); +} + +/** + * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame + */ +static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff, + u16 *buff_len, u8 *pci_mem, u8 mpa_key) +{ + int ret = 0; + + *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0]; + + switch (cm_node->mpa_frame_rev) { + case IETF_MPA_V1: + *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg); + *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size; + build_mpa_v1(cm_node, *start_buff, mpa_key); + break; + case IETF_MPA_V2: + *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size; + build_mpa_v2(cm_node, *start_buff, mpa_key); + break; + default: + ret = -EINVAL; + } + return ret; +} + +/** + * build_mpa_v2 - build a MPA V2 frame + */ +static void build_mpa_v2(struct nes_cm_node *cm_node, + void *start_addr, u8 mpa_key) +{ + struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; + struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; + u16 ctrl_ird; + u16 ctrl_ord; + + /* initialize the upper 5 bytes of the frame */ + build_mpa_v1(cm_node, start_addr, mpa_key); + mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */ + mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); + + /* initialize RTR msg */ + ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? + IETF_NO_IRD_ORD : cm_node->ird_size; + ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? + IETF_NO_IRD_ORD : cm_node->ord_size; + + ctrl_ird |= IETF_PEER_TO_PEER; + ctrl_ird |= IETF_FLPDU_ZERO_LEN; + + switch (mpa_key) { + case MPA_KEY_REQUEST: + ctrl_ord |= IETF_RDMA0_WRITE; + ctrl_ord |= IETF_RDMA0_READ; + break; + case MPA_KEY_REPLY: + switch (cm_node->send_rdma0_op) { + case SEND_RDMA_WRITE_ZERO: + ctrl_ord |= IETF_RDMA0_WRITE; + break; + case SEND_RDMA_READ_ZERO: + ctrl_ord |= IETF_RDMA0_READ; + break; + } + } + rtr_msg->ctrl_ird = htons(ctrl_ird); + rtr_msg->ctrl_ord = htons(ctrl_ord); +} + +/** + * build_mpa_v1 - build a MPA V1 frame + */ +static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key) +{ + struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr; + + switch (mpa_key) { + case MPA_KEY_REQUEST: + memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE); + break; + case MPA_KEY_REPLY: + memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); + break; + } + mpa_frame->flags = IETF_MPA_FLAGS_CRC; + mpa_frame->rev = cm_node->mpa_frame_rev; + mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size); +} + +static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr) +{ + u64 u64temp; + struct nes_qp *nesqp = *nesqp_addr; + struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0]; + + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN >> 1; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); + + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; + + switch (cm_node->send_rdma0_op) { + case SEND_RDMA_WRITE_ZERO: + nes_debug(NES_DBG_CM, "Sending first write.\n"); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + break; + + case SEND_RDMA_READ_ZERO: + default: + if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) { + printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n", + __func__, __LINE__, cm_node->send_rdma0_op); + WARN_ON(1); + } + nes_debug(NES_DBG_CM, "Sending first rdma operation.\n"); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAR); + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1; + break; + } + + if (nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); + } + + /*use the reserved spot on the WQ for the extra first WQE*/ + nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + nesqp->skip_lsmm = 1; + nesqp->hwqp.sq_tail = 0; +} + +/** + * schedule_nes_timer + * note - cm_node needs to be protected before calling this. Encase in: + * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node); + */ +int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, + enum nes_timer_type type, int send_retrans, + int close_when_complete) +{ + unsigned long flags; + struct nes_cm_core *cm_core = cm_node->cm_core; + struct nes_timer_entry *new_send; + int ret = 0; + u32 was_timer_set; + + new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); + if (!new_send) + return -ENOMEM; + + /* new_send->timetosend = currenttime */ + new_send->retrycount = NES_DEFAULT_RETRYS; + new_send->retranscount = NES_DEFAULT_RETRANS; + new_send->skb = skb; + new_send->timetosend = jiffies; + new_send->type = type; + new_send->netdev = cm_node->netdev; + new_send->send_retrans = send_retrans; + new_send->close_when_complete = close_when_complete; + + if (type == NES_TIMER_TYPE_CLOSE) { + new_send->timetosend += (HZ / 10); + if (cm_node->recv_entry) { + kfree(new_send); + WARN_ON(1); + return -EINVAL; + } + cm_node->recv_entry = new_send; + } + + if (type == NES_TIMER_TYPE_SEND) { + new_send->seq_num = ntohl(tcp_hdr(skb)->seq); + atomic_inc(&new_send->skb->users); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + cm_node->send_entry = new_send; + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; + + ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); + if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "Error sending packet %p " + "(jiffies = %lu)\n", new_send, jiffies); + new_send->timetosend = jiffies; + ret = NETDEV_TX_OK; + } else { + cm_packets_sent++; + if (!send_retrans) { + cleanup_retrans_entry(cm_node); + if (close_when_complete) + rem_ref_cm_node(cm_core, cm_node); + return ret; + } + } + } + + was_timer_set = timer_pending(&cm_core->tcp_timer); + + if (!was_timer_set) { + cm_core->tcp_timer.expires = new_send->timetosend; + add_timer(&cm_core->tcp_timer); + } + + return ret; +} + +static void nes_retrans_expired(struct nes_cm_node *cm_node) +{ + struct iw_cm_id *cm_id = cm_node->cm_id; + enum nes_cm_node_state state = cm_node->state; + cm_node->state = NES_CM_STATE_CLOSED; + + switch (state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_CLOSING: + rem_ref_cm_node(cm_node->cm_core, cm_node); + break; + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_FIN_WAIT1: + if (cm_node->cm_id) + cm_id->rem_ref(cm_id); + send_reset(cm_node, NULL); + break; + default: + add_ref_cm_node(cm_node); + send_reset(cm_node, NULL); + create_event(cm_node, NES_CM_EVENT_ABORTED); + } +} + +static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node) +{ + struct nes_timer_entry *recv_entry = cm_node->recv_entry; + struct iw_cm_id *cm_id = cm_node->cm_id; + struct nes_qp *nesqp; + unsigned long qplockflags; + + if (!recv_entry) + return; + nesqp = (struct nes_qp *)recv_entry->skb; + if (nesqp) { + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with something " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + } + } else if (rem_node) { + /* TIME_WAIT state */ + rem_ref_cm_node(cm_node->cm_core, cm_node); + } + if (cm_node->cm_id) + cm_id->rem_ref(cm_id); + kfree(recv_entry); + cm_node->recv_entry = NULL; +} + +/** + * nes_cm_timer_tick + */ +static void nes_cm_timer_tick(unsigned long pass) +{ + unsigned long flags; + unsigned long nexttimeout = jiffies + NES_LONG_TIME; + struct nes_cm_node *cm_node; + struct nes_timer_entry *send_entry, *recv_entry; + struct list_head *list_core_temp; + struct list_head *list_node; + struct nes_cm_core *cm_core = g_cm_core; + u32 settimer = 0; + unsigned long timetosend; + int ret = NETDEV_TX_OK; + + struct list_head timer_list; + + INIT_LIST_HEAD(&timer_list); + spin_lock_irqsave(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, + &cm_core->connected_nodes) { + cm_node = container_of(list_node, struct nes_cm_node, list); + if ((cm_node->recv_entry) || (cm_node->send_entry)) { + add_ref_cm_node(cm_node); + list_add(&cm_node->timer_entry, &timer_list); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, &timer_list) { + cm_node = container_of(list_node, struct nes_cm_node, + timer_entry); + recv_entry = cm_node->recv_entry; + + if (recv_entry) { + if (time_after(recv_entry->timetosend, jiffies)) { + if (nexttimeout > recv_entry->timetosend || + !settimer) { + nexttimeout = recv_entry->timetosend; + settimer = 1; + } + } else { + handle_recv_entry(cm_node, 1); + } + } + + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + do { + send_entry = cm_node->send_entry; + if (!send_entry) + break; + if (time_after(send_entry->timetosend, jiffies)) { + if (cm_node->state != NES_CM_STATE_TSA) { + if ((nexttimeout > + send_entry->timetosend) || + !settimer) { + nexttimeout = + send_entry->timetosend; + settimer = 1; + } + } else { + free_retrans_entry(cm_node); + } + break; + } + + if ((cm_node->state == NES_CM_STATE_TSA) || + (cm_node->state == NES_CM_STATE_CLOSED)) { + free_retrans_entry(cm_node); + break; + } + + if (!send_entry->retranscount || + !send_entry->retrycount) { + cm_packets_dropped++; + free_retrans_entry(cm_node); + + spin_unlock_irqrestore( + &cm_node->retrans_list_lock, flags); + nes_retrans_expired(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + spin_lock_irqsave(&cm_node->retrans_list_lock, + flags); + break; + } + atomic_inc(&send_entry->skb->users); + cm_packets_retrans++; + nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " + "for node %p, jiffies = %lu, time to send = " + "%lu, retranscount = %u, send_entry->seq_num = " + "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " + "0x%08X\n", send_entry, cm_node, jiffies, + send_entry->timetosend, + send_entry->retranscount, + send_entry->seq_num, + cm_node->tcp_cntxt.rem_ack_num); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, + flags); + ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for " + "node=%p\n", cm_node); + cm_packets_bounced++; + send_entry->retrycount--; + nexttimeout = jiffies + NES_SHORT_TIME; + settimer = 1; + break; + } else { + cm_packets_sent++; + } + nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " + "%u, retry count = %u.\n", + send_entry->retranscount, + send_entry->retrycount); + if (send_entry->send_retrans) { + send_entry->retranscount--; + timetosend = (NES_RETRY_TIMEOUT << + (NES_DEFAULT_RETRANS - send_entry->retranscount)); + + send_entry->timetosend = jiffies + + min(timetosend, NES_MAX_TIMEOUT); + if (nexttimeout > send_entry->timetosend || + !settimer) { + nexttimeout = send_entry->timetosend; + settimer = 1; + } + } else { + int close_when_complete; + close_when_complete = + send_entry->close_when_complete; + nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", + cm_node, cm_node->state); + free_retrans_entry(cm_node); + if (close_when_complete) + rem_ref_cm_node(cm_node->cm_core, + cm_node); + } + } while (0); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + rem_ref_cm_node(cm_node->cm_core, cm_node); + } + + if (settimer) { + if (!timer_pending(&cm_core->tcp_timer)) { + cm_core->tcp_timer.expires = nexttimeout; + add_timer(&cm_core->tcp_timer); + } + } +} + + +/** + * send_syn + */ +static int send_syn(struct nes_cm_node *cm_node, u32 sendack, + struct sk_buff *skb) +{ + int ret; + int flags = SET_SYN; + char optionsbuffer[sizeof(struct option_mss) + + sizeof(struct option_windowscale) + sizeof(struct option_base) + + TCP_OPTIONS_PADDING]; + + int optionssize = 0; + /* Sending MSS option */ + union all_known_options *options; + + if (!cm_node) + return -EINVAL; + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_mss.optionnum = OPTION_NUMBER_MSS; + options->as_mss.length = sizeof(struct option_mss); + options->as_mss.mss = htons(cm_node->tcp_cntxt.mss); + optionssize += sizeof(struct option_mss); + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE; + options->as_windowscale.length = sizeof(struct option_windowscale); + options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; + optionssize += sizeof(struct option_windowscale); + + if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) { + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_base.optionnum = OPTION_NUMBER_WRITE0; + options->as_base.length = sizeof(struct option_base); + optionssize += sizeof(struct option_base); + /* we need the size to be a multiple of 4 */ + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = 1; + optionssize += 1; + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = 1; + optionssize += 1; + } + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = OPTION_NUMBER_END; + optionssize += 1; + + if (!skb) + skb = dev_alloc_skb(MAX_CM_BUFFER); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + if (sendack) + flags |= SET_ACK; + + form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + + return ret; +} + + +/** + * send_reset + */ +static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret; + int flags = SET_RST | SET_ACK; + + if (!skb) + skb = dev_alloc_skb(MAX_CM_BUFFER); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -ENOMEM; + } + + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); + + return ret; +} + + +/** + * send_ack + */ +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret; + + if (!skb) + skb = dev_alloc_skb(MAX_CM_BUFFER); + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0); + + return ret; +} + + +/** + * send_fin + */ +static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret; + + /* if we didn't get a frame get one */ + if (!skb) + skb = dev_alloc_skb(MAX_CM_BUFFER); + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + + return ret; +} + + +/** + * find_node - find a cm node that matches the reference cm node + */ +static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, + u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) +{ + unsigned long flags; + struct list_head *hte; + struct nes_cm_node *cm_node; + + /* get a handle on the hte */ + hte = &cm_core->connected_nodes; + + /* walk list and find cm_node associated with this session ID */ + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_entry(cm_node, hte, list) { + /* compare quad, return node handle if a match */ + nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n", + cm_node->loc_addr, cm_node->loc_port, + loc_addr, loc_port, + cm_node->rem_addr, cm_node->rem_port, + rem_addr, rem_port); + if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) && + (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + return cm_node; + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + /* no owner node */ + return NULL; +} + + +/** + * find_listener - find a cm node listening on this addr-port pair + */ +static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, + nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) +{ + unsigned long flags; + struct nes_cm_listener *listen_node; + + /* walk list and find cm_node associated with this session ID */ + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { + /* compare node pair, return node handle if a match */ + if (((listen_node->loc_addr == dst_addr) || + listen_node->loc_addr == 0x00000000) && + (listen_node->loc_port == dst_port) && + (listener_state & listen_node->listener_state)) { + atomic_inc(&listen_node->ref_count); + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + return listen_node; + } + } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + /* no listener */ + return NULL; +} + + +/** + * add_hte_node - add a cm node to the hash table + */ +static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) +{ + unsigned long flags; + struct list_head *hte; + + if (!cm_node || !cm_core) + return -EINVAL; + + nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", + cm_node); + + spin_lock_irqsave(&cm_core->ht_lock, flags); + + /* get a handle on the hash table element (list head for this slot) */ + hte = &cm_core->connected_nodes; + list_add_tail(&cm_node->list, hte); + atomic_inc(&cm_core->ht_node_cnt); + + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + return 0; +} + + +/** + * mini_cm_dec_refcnt_listen + */ +static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, + struct nes_cm_listener *listener, int free_hanging_nodes) +{ + int ret = -EINVAL; + int err = 0; + unsigned long flags; + struct list_head *list_pos = NULL; + struct list_head *list_temp = NULL; + struct nes_cm_node *cm_node = NULL; + struct list_head reset_list; + + nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " + "refcnt=%d\n", listener, free_hanging_nodes, + atomic_read(&listener->ref_count)); + /* free non-accelerated child nodes for this listener */ + INIT_LIST_HEAD(&reset_list); + if (free_hanging_nodes) { + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_safe(list_pos, list_temp, + &g_cm_core->connected_nodes) { + cm_node = container_of(list_pos, struct nes_cm_node, + list); + if ((cm_node->listener == listener) && + (!cm_node->accelerated)) { + add_ref_cm_node(cm_node); + list_add(&cm_node->reset_entry, &reset_list); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + } + + list_for_each_safe(list_pos, list_temp, &reset_list) { + cm_node = container_of(list_pos, struct nes_cm_node, + reset_entry); + { + struct nes_cm_node *loopback = cm_node->loopbackpartner; + enum nes_cm_node_state old_state; + if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + } else { + if (!loopback) { + cleanup_retrans_entry(cm_node); + err = send_reset(cm_node, NULL); + if (err) { + cm_node->state = + NES_CM_STATE_CLOSED; + WARN_ON(1); + } else { + old_state = cm_node->state; + cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; + if (old_state != NES_CM_STATE_MPAREQ_RCVD) + rem_ref_cm_node( + cm_node->cm_core, + cm_node); + } + } else { + struct nes_cm_event event; + + event.cm_node = loopback; + event.cm_info.rem_addr = + loopback->rem_addr; + event.cm_info.loc_addr = + loopback->loc_addr; + event.cm_info.rem_port = + loopback->rem_port; + event.cm_info.loc_port = + loopback->loc_port; + event.cm_info.cm_id = loopback->cm_id; + add_ref_cm_node(loopback); + loopback->state = NES_CM_STATE_CLOSED; + cm_event_connect_error(&event); + cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; + + rem_ref_cm_node(cm_node->cm_core, + cm_node); + + } + } + } + } + + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + if (!atomic_dec_return(&listener->ref_count)) { + list_del(&listener->list); + + /* decrement our listen node count */ + atomic_dec(&cm_core->listen_node_cnt); + + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + if (listener->nesvnic) + nes_manage_apbvt(listener->nesvnic, listener->loc_port, + PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + + nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); + + kfree(listener); + listener = NULL; + ret = 0; + atomic_inc(&cm_listens_destroyed); + } else { + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + } + if (listener) { + if (atomic_read(&listener->pend_accepts_cnt) > 0) + nes_debug(NES_DBG_CM, "destroying listener (%p)" + " with non-zero pending accepts=%u\n", + listener, atomic_read(&listener->pend_accepts_cnt)); + } + + return ret; +} + + +/** + * mini_cm_del_listen + */ +static int mini_cm_del_listen(struct nes_cm_core *cm_core, + struct nes_cm_listener *listener) +{ + listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE; + listener->cm_id = NULL; /* going to be destroyed pretty soon */ + return mini_cm_dec_refcnt_listen(cm_core, listener, 1); +} + + +/** + * mini_cm_accelerated + */ +static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, + struct nes_cm_node *cm_node) +{ + u32 was_timer_set; + + cm_node->accelerated = 1; + + if (cm_node->accept_pend) { + BUG_ON(!cm_node->listener); + atomic_dec(&cm_node->listener->pend_accepts_cnt); + cm_node->accept_pend = 0; + BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); + } + + was_timer_set = timer_pending(&cm_core->tcp_timer); + if (!was_timer_set) { + cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME; + add_timer(&cm_core->tcp_timer); + } + + return 0; +} + + +/** + * nes_addr_resolve_neigh + */ +static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex) +{ + struct rtable *rt; + struct neighbour *neigh; + int rc = arpindex; + struct net_device *netdev; + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + + rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); + if (IS_ERR(rt)) { + printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", + __func__, dst_ip); + return rc; + } + + if (netif_is_bond_slave(nesvnic->netdev)) + netdev = nesvnic->netdev->master; + else + netdev = nesvnic->netdev; + + neigh = dst_neigh_lookup(&rt->dst, &dst_ip); + + rcu_read_lock(); + if (neigh) { + if (neigh->nud_state & NUD_VALID) { + nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" + " is %pM, Gateway is 0x%08X \n", dst_ip, + neigh->ha, ntohl(rt->rt_gateway)); + + if (arpindex >= 0) { + if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, + neigh->ha, ETH_ALEN)) { + /* Mac address same as in nes_arp_table */ + goto out; + } + + nes_manage_arp_cache(nesvnic->netdev, + nesadapter->arp_table[arpindex].mac_addr, + dst_ip, NES_ARP_DELETE); + } + + nes_manage_arp_cache(nesvnic->netdev, neigh->ha, + dst_ip, NES_ARP_ADD); + rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, + NES_ARP_RESOLVE); + } else { + neigh_event_send(neigh, NULL); + } + } +out: + rcu_read_unlock(); + + if (neigh) + neigh_release(neigh); + + ip_rt_put(rt); + return rc; +} + +/** + * make_cm_node - create a new instance of a cm node + */ +static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, + struct nes_cm_listener *listener) +{ + struct nes_cm_node *cm_node; + struct timespec ts; + int oldarpindex = 0; + int arpindex = 0; + struct nes_device *nesdev; + struct nes_adapter *nesadapter; + + /* create an hte and cm_node for this instance */ + cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); + if (!cm_node) + return NULL; + + /* set our node specific transport info */ + cm_node->loc_addr = cm_info->loc_addr; + cm_node->rem_addr = cm_info->rem_addr; + cm_node->loc_port = cm_info->loc_port; + cm_node->rem_port = cm_info->rem_port; + + cm_node->mpa_frame_rev = mpa_version; + cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + cm_node->ird_size = IETF_NO_IRD_ORD; + cm_node->ord_size = IETF_NO_IRD_ORD; + + nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", + &cm_node->loc_addr, cm_node->loc_port, + &cm_node->rem_addr, cm_node->rem_port); + cm_node->listener = listener; + cm_node->netdev = nesvnic->netdev; + cm_node->cm_id = cm_info->cm_id; + memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); + + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, + cm_node->cm_id); + + spin_lock_init(&cm_node->retrans_list_lock); + + cm_node->loopbackpartner = NULL; + atomic_set(&cm_node->ref_count, 1); + /* associate our parent CM core */ + cm_node->cm_core = cm_core; + cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID; + cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> + NES_CM_DEFAULT_RCV_WND_SCALE; + ts = current_kernel_time(); + cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - + sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; + cm_node->tcp_cntxt.rcv_nxt = 0; + /* get a unique session ID , add thread_id to an upcounter to handle race */ + atomic_inc(&cm_core->node_cnt); + cm_node->conn_type = cm_info->conn_type; + cm_node->apbvt_set = 0; + cm_node->accept_pend = 0; + + cm_node->nesvnic = nesvnic; + /* get some device handles, for arp lookup */ + nesdev = nesvnic->nesdev; + nesadapter = nesdev->nesadapter; + + cm_node->loopbackpartner = NULL; + + /* get the mac addr for the remote node */ + if (ipv4_is_loopback(htonl(cm_node->rem_addr))) { + arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE); + } else { + oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); + arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex); + } + if (arpindex < 0) { + kfree(cm_node); + return NULL; + } + + /* copy the mac addr to node context */ + memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN); + nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %pM\n", + cm_node->rem_mac); + + add_hte_node(cm_core, cm_node); + atomic_inc(&cm_nodes_created); + + return cm_node; +} + + +/** + * add_ref_cm_node - destroy an instance of a cm node + */ +static int add_ref_cm_node(struct nes_cm_node *cm_node) +{ + atomic_inc(&cm_node->ref_count); + return 0; +} + + +/** + * rem_ref_cm_node - destroy an instance of a cm node + */ +static int rem_ref_cm_node(struct nes_cm_core *cm_core, + struct nes_cm_node *cm_node) +{ + unsigned long flags; + struct nes_qp *nesqp; + + if (!cm_node) + return -EINVAL; + + spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags); + if (atomic_dec_return(&cm_node->ref_count)) { + spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags); + return 0; + } + list_del(&cm_node->list); + atomic_dec(&cm_core->ht_node_cnt); + spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags); + + /* if the node is destroyed before connection was accelerated */ + if (!cm_node->accelerated && cm_node->accept_pend) { + BUG_ON(!cm_node->listener); + atomic_dec(&cm_node->listener->pend_accepts_cnt); + BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); + } + WARN_ON(cm_node->send_entry); + if (cm_node->recv_entry) + handle_recv_entry(cm_node, 0); + if (cm_node->listener) { + mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); + } else { + if (cm_node->apbvt_set && cm_node->nesvnic) { + nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, + PCI_FUNC( + cm_node->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + } + } + + atomic_dec(&cm_core->node_cnt); + atomic_inc(&cm_nodes_destroyed); + nesqp = cm_node->nesqp; + if (nesqp) { + nesqp->cm_node = NULL; + nes_rem_ref(&nesqp->ibqp); + cm_node->nesqp = NULL; + } + + kfree(cm_node); + return 0; +} + +/** + * process_options + */ +static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, + u32 optionsize, u32 syn_packet) +{ + u32 tmp; + u32 offset = 0; + union all_known_options *all_options; + char got_mss_option = 0; + + while (offset < optionsize) { + all_options = (union all_known_options *)(optionsloc + offset); + switch (all_options->as_base.optionnum) { + case OPTION_NUMBER_END: + offset = optionsize; + break; + case OPTION_NUMBER_NONE: + offset += 1; + continue; + case OPTION_NUMBER_MSS: + nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " + "Size: %d\n", __func__, + all_options->as_mss.length, offset, optionsize); + got_mss_option = 1; + if (all_options->as_mss.length != 4) { + return 1; + } else { + tmp = ntohs(all_options->as_mss.mss); + if (tmp > 0 && tmp < + cm_node->tcp_cntxt.mss) + cm_node->tcp_cntxt.mss = tmp; + } + break; + case OPTION_NUMBER_WINDOW_SCALE: + cm_node->tcp_cntxt.snd_wscale = + all_options->as_windowscale.shiftcount; + break; + default: + nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", + all_options->as_base.optionnum); + break; + } + offset += all_options->as_base.length; + } + if ((!got_mss_option) && (syn_packet)) + cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; + return 0; +} + +static void drop_packet(struct sk_buff *skb) +{ + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); +} + +static void handle_fin_pkt(struct nes_cm_node *cm_node) +{ + nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " + "refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_MPAREJ_RCVD: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_MPAREQ_SENT: + create_event(cm_node, NES_CM_EVENT_ABORTED); + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + add_ref_cm_node(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSING; + send_ack(cm_node, NULL); + /* Wait for ACK as this is simultaneous close.. + * After we receive ACK, do not send anything.. + * Just rm the node.. Done.. */ + break; + case NES_CM_STATE_FIN_WAIT2: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_TIME_WAIT; + send_ack(cm_node, NULL); + schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0); + break; + case NES_CM_STATE_TIME_WAIT: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_node->cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + default: + nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n", + cm_node, cm_node->state); + break; + } +} + + +static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + + int reset = 0; /* whether to send reset in case of err.. */ + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." + " refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + switch (cm_node->mpa_frame_rev) { + case IETF_MPA_V2: + cm_node->mpa_frame_rev = IETF_MPA_V1; + /* send a syn and goto syn sent state */ + cm_node->state = NES_CM_STATE_SYN_SENT; + if (send_syn(cm_node, 0, NULL)) { + active_open_err(cm_node, skb, reset); + } + break; + case IETF_MPA_V1: + default: + active_open_err(cm_node, skb, reset); + break; + } + break; + case NES_CM_STATE_MPAREQ_RCVD: + atomic_inc(&cm_node->passive_state); + dev_kfree_skb_any(skb); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_LISTENING: + nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__); + passive_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_TSA: + active_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_CLOSED: + drop_packet(skb); + break; + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_LAST_ACK: + cm_node->cm_id->rem_ref(cm_node->cm_id); + case NES_CM_STATE_TIME_WAIT: + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_node->cm_core, cm_node); + drop_packet(skb); + break; + default: + drop_packet(skb); + break; + } +} + + +static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret = 0; + int datasize = skb->len; + u8 *dataloc = skb->data; + + enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN; + u32 res_type; + + ret = parse_mpa(cm_node, dataloc, &res_type, datasize); + if (ret) { + nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); + if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for " + "cm_node=%p listener=%p state=%d\n", __func__, + __LINE__, cm_node, cm_node->listener, + cm_node->state); + active_open_err(cm_node, skb, 1); + } else { + passive_open_err(cm_node, skb, 1); + } + return; + } + + switch (cm_node->state) { + case NES_CM_STATE_ESTABLISHED: + if (res_type == NES_MPA_REQUEST_REJECT) + /*BIG problem as we are receiving the MPA.. So should + * not be REJECT.. This is Passive Open.. We can + * only receive it Reject for Active Open...*/ + WARN_ON(1); + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + type = NES_CM_EVENT_MPA_REQ; + atomic_set(&cm_node->passive_state, + NES_PASSIVE_STATE_INDICATED); + break; + case NES_CM_STATE_MPAREQ_SENT: + cleanup_retrans_entry(cm_node); + if (res_type == NES_MPA_REQUEST_REJECT) { + type = NES_CM_EVENT_MPA_REJECT; + cm_node->state = NES_CM_STATE_MPAREJ_RCVD; + } else { + type = NES_CM_EVENT_CONNECTED; + cm_node->state = NES_CM_STATE_TSA; + } + + break; + default: + WARN_ON(1); + break; + } + dev_kfree_skb_any(skb); + create_event(cm_node, type); +} + +static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_TSA: + default: + drop_packet(skb); + } +} + +static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err; + + err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1; + if (err) + active_open_err(cm_node, skb, 1); + + return err; +} + +static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err = 0; + u32 seq; + u32 ack_seq; + u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; + u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; + u32 rcv_wnd; + + seq = ntohl(tcph->seq); + ack_seq = ntohl(tcph->ack_seq); + rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; + if (ack_seq != loc_seq_num) + err = 1; + else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd))) + err = 1; + if (err) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + indicate_pkt_err(cm_node, skb); + nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " + "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, + rcv_wnd); + } + return err; +} + +/* + * handle_syn_pkt() is for Passive node. The syn packet is received when a node + * is created with a listener or it may comein as rexmitted packet which in + * that case will be just dropped. + */ +static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int ret; + u32 inc_sequence; + int optionsize; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + skb_trim(skb, 0); + inc_sequence = ntohl(tcph->seq); + + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + /* Rcvd syn on active open connection*/ + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_LISTENING: + /* Passive OPEN */ + if (atomic_read(&cm_node->listener->pend_accepts_cnt) > + cm_node->listener->backlog) { + nes_debug(NES_DBG_CM, "drop syn due to backlog " + "pressure \n"); + cm_backlog_drops++; + passive_open_err(cm_node, skb, 0); + break; + } + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, + 1); + if (ret) { + passive_open_err(cm_node, skb, 0); + /* drop pkt */ + break; + } + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + BUG_ON(cm_node->send_entry); + cm_node->accept_pend = 1; + atomic_inc(&cm_node->listener->pend_accepts_cnt); + + cm_node->state = NES_CM_STATE_SYN_RCVD; + send_syn(cm_node, 1, skb); + break; + case NES_CM_STATE_CLOSED: + cleanup_retrans_entry(cm_node); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + break; + case NES_CM_STATE_TSA: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + default: + drop_packet(skb); + break; + } +} + +static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int ret; + u32 inc_sequence; + int optionsize; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + skb_trim(skb, 0); + inc_sequence = ntohl(tcph->seq); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + cleanup_retrans_entry(cm_node); + /* active open */ + if (check_syn(cm_node, tcph, skb)) + return; + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + /* setup options */ + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); + if (ret) { + nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", + cm_node); + break; + } + cleanup_retrans_entry(cm_node); + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + send_mpa_request(cm_node, skb); + cm_node->state = NES_CM_STATE_MPAREQ_SENT; + break; + case NES_CM_STATE_MPAREQ_RCVD: + /* passive open, so should not be here */ + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_LISTENING: + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + send_reset(cm_node, skb); + break; + case NES_CM_STATE_CLOSED: + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + cleanup_retrans_entry(cm_node); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_MPAREQ_SENT: + default: + drop_packet(skb); + break; + } +} + +static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int datasize = 0; + u32 inc_sequence; + int ret = 0; + int optionsize; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + + if (check_seq(cm_node, tcph, skb)) + return -EINVAL; + + skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); + datasize = skb->len; + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + /* Passive OPEN */ + cleanup_retrans_entry(cm_node); + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1); + if (ret) + break; + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + cm_node->state = NES_CM_STATE_ESTABLISHED; + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb); + } else { /* rcvd ACK only */ + dev_kfree_skb_any(skb); + } + break; + case NES_CM_STATE_ESTABLISHED: + /* Passive OPEN */ + cleanup_retrans_entry(cm_node); + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb); + } else { + drop_packet(skb); + } + break; + case NES_CM_STATE_MPAREQ_SENT: + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb); + } else { /* Could be just an ack pkt.. */ + dev_kfree_skb_any(skb); + } + break; + case NES_CM_STATE_LISTENING: + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + send_reset(cm_node, skb); + break; + case NES_CM_STATE_CLOSED: + cleanup_retrans_entry(cm_node); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + break; + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + cm_node->cm_id->rem_ref(cm_node->cm_id); + rem_ref_cm_node(cm_node->cm_core, cm_node); + drop_packet(skb); + break; + case NES_CM_STATE_FIN_WAIT1: + cleanup_retrans_entry(cm_node); + drop_packet(skb); + cm_node->state = NES_CM_STATE_FIN_WAIT2; + break; + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_TSA: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_UNKNOWN: + default: + cleanup_retrans_entry(cm_node); + drop_packet(skb); + break; + } + return ret; +} + + + +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive) +{ + u8 *optionsloc = (u8 *)&tcph[1]; + + if (optionsize) { + if (process_options(cm_node, optionsloc, optionsize, + (u32)tcph->syn)) { + nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", + __func__, cm_node); + if (passive) + passive_open_err(cm_node, skb, 1); + else + active_open_err(cm_node, skb, 1); + return 1; + } + } + + cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << + cm_node->tcp_cntxt.snd_wscale; + + if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) + cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; + return 0; +} + +/* + * active_open_err() will send reset() if flag set.. + * It will also send ABORT event. + */ +static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + if (reset) { + nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " + "state=%d\n", cm_node, cm_node->state); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + } else { + dev_kfree_skb_any(skb); + } + + cm_node->state = NES_CM_STATE_CLOSED; + create_event(cm_node, NES_CM_EVENT_ABORTED); +} + +/* + * passive_open_err() will either do a reset() or will free up the skb and + * remove the cm_node. + */ +static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + if (reset) { + nes_debug(NES_DBG_CM, "passive_open_err sending RST for " + "cm_node=%p state =%d\n", cm_node, cm_node->state); + send_reset(cm_node, skb); + } else { + dev_kfree_skb_any(skb); + rem_ref_cm_node(cm_node->cm_core, cm_node); + } +} + +/* + * free_retrans_entry() routines assumes that the retrans_list_lock has + * been acquired before calling. + */ +static void free_retrans_entry(struct nes_cm_node *cm_node) +{ + struct nes_timer_entry *send_entry; + + send_entry = cm_node->send_entry; + if (send_entry) { + cm_node->send_entry = NULL; + dev_kfree_skb_any(send_entry->skb); + kfree(send_entry); + rem_ref_cm_node(cm_node->cm_core, cm_node); + } +} + +static void cleanup_retrans_entry(struct nes_cm_node *cm_node) +{ + unsigned long flags; + + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); +} + +/** + * process_packet + * Returns skb if to be freed, else it will return NULL if already used.. + */ +static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct nes_cm_core *cm_core) +{ + enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; + struct tcphdr *tcph = tcp_hdr(skb); + u32 fin_set = 0; + int ret = 0; + + skb_pull(skb, ip_hdr(skb)->ihl << 2); + + nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " + "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, + tcph->ack, tcph->rst, tcph->fin); + + if (tcph->rst) { + pkt_type = NES_PKT_TYPE_RST; + } else if (tcph->syn) { + pkt_type = NES_PKT_TYPE_SYN; + if (tcph->ack) + pkt_type = NES_PKT_TYPE_SYNACK; + } else if (tcph->ack) { + pkt_type = NES_PKT_TYPE_ACK; + } + if (tcph->fin) + fin_set = 1; + + switch (pkt_type) { + case NES_PKT_TYPE_SYN: + handle_syn_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_SYNACK: + handle_synack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_ACK: + ret = handle_ack_pkt(cm_node, skb, tcph); + if (fin_set && !ret) + handle_fin_pkt(cm_node); + break; + case NES_PKT_TYPE_RST: + handle_rst_pkt(cm_node, skb, tcph); + break; + default: + if ((fin_set) && (!check_seq(cm_node, tcph, skb))) + handle_fin_pkt(cm_node); + drop_packet(skb); + break; + } +} + +/** + * mini_cm_listen - create a listen node with params + */ +static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) +{ + struct nes_cm_listener *listener; + unsigned long flags; + + nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", + cm_info->loc_addr, cm_info->loc_port); + + /* cannot have multiple matching listeners */ + listener = find_listener(cm_core, htonl(cm_info->loc_addr), + htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); + if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { + /* find automatically incs ref count ??? */ + atomic_dec(&listener->ref_count); + nes_debug(NES_DBG_CM, "Not creating listener since it already exists\n"); + return NULL; + } + + if (!listener) { + /* create a CM listen node (1/2 node to compare incoming traffic to) */ + listener = kzalloc(sizeof(*listener), GFP_ATOMIC); + if (!listener) { + nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n"); + return NULL; + } + + listener->loc_addr = htonl(cm_info->loc_addr); + listener->loc_port = htons(cm_info->loc_port); + listener->reused_node = 0; + + atomic_set(&listener->ref_count, 1); + } + /* pasive case */ + /* find already inc'ed the ref count */ + else { + listener->reused_node = 1; + } + + listener->cm_id = cm_info->cm_id; + atomic_set(&listener->pend_accepts_cnt, 0); + listener->cm_core = cm_core; + listener->nesvnic = nesvnic; + atomic_inc(&cm_core->node_cnt); + + listener->conn_type = cm_info->conn_type; + listener->backlog = cm_info->backlog; + listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE; + + if (!listener->reused_node) { + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_add(&listener->list, &cm_core->listen_list.list); + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + atomic_inc(&cm_core->listen_node_cnt); + } + + nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x," + " listener = %p, backlog = %d, cm_id = %p.\n", + cm_info->loc_addr, cm_info->loc_port, + listener, listener->backlog, listener->cm_id); + + return listener; +} + + +/** + * mini_cm_connect - make a connection node with params + */ +static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, u16 private_data_len, + void *private_data, struct nes_cm_info *cm_info) +{ + int ret = 0; + struct nes_cm_node *cm_node; + struct nes_cm_listener *loopbackremotelistener; + struct nes_cm_node *loopbackremotenode; + struct nes_cm_info loopback_cm_info; + u8 *start_buff; + + /* create a CM connection node */ + cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); + if (!cm_node) + return NULL; + + /* set our node side to client (active) side */ + cm_node->tcp_cntxt.client = 1; + cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + + if (cm_info->loc_addr == cm_info->rem_addr) { + loopbackremotelistener = find_listener(cm_core, + ntohl(nesvnic->local_ipaddr), cm_node->rem_port, + NES_CM_LISTENER_ACTIVE_STATE); + if (loopbackremotelistener == NULL) { + create_event(cm_node, NES_CM_EVENT_ABORTED); + } else { + loopback_cm_info = *cm_info; + loopback_cm_info.loc_port = cm_info->rem_port; + loopback_cm_info.rem_port = cm_info->loc_port; + loopback_cm_info.cm_id = loopbackremotelistener->cm_id; + loopbackremotenode = make_cm_node(cm_core, nesvnic, + &loopback_cm_info, loopbackremotelistener); + if (!loopbackremotenode) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + return NULL; + } + atomic_inc(&cm_loopbacks); + loopbackremotenode->loopbackpartner = cm_node; + loopbackremotenode->tcp_cntxt.rcv_wscale = + NES_CM_DEFAULT_RCV_WND_SCALE; + cm_node->loopbackpartner = loopbackremotenode; + memcpy(loopbackremotenode->mpa_frame_buf, private_data, + private_data_len); + loopbackremotenode->mpa_frame_size = private_data_len; + + /* we are done handling this state. */ + /* set node to a TSA state */ + cm_node->state = NES_CM_STATE_TSA; + cm_node->tcp_cntxt.rcv_nxt = + loopbackremotenode->tcp_cntxt.loc_seq_num; + loopbackremotenode->tcp_cntxt.rcv_nxt = + cm_node->tcp_cntxt.loc_seq_num; + cm_node->tcp_cntxt.max_snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.max_snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wscale = + loopbackremotenode->tcp_cntxt.rcv_wscale; + loopbackremotenode->tcp_cntxt.snd_wscale = + cm_node->tcp_cntxt.rcv_wscale; + loopbackremotenode->state = NES_CM_STATE_MPAREQ_RCVD; + create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); + } + return cm_node; + } + + start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); + cm_node->mpa_frame_size = private_data_len; + + memcpy(start_buff, private_data, private_data_len); + + /* send a syn and goto syn sent state */ + cm_node->state = NES_CM_STATE_SYN_SENT; + ret = send_syn(cm_node, 0, NULL); + + if (ret) { + /* error in sending the syn free up the cm_node struct */ + nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " + "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + rem_ref_cm_node(cm_node->cm_core, cm_node); + cm_node = NULL; + } + + if (cm_node) { + nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," + "port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + } + + return cm_node; +} + + +/** + * mini_cm_accept - accept a connection + * This function is never called + */ +static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) +{ + return 0; +} + + +/** + * mini_cm_reject - reject and teardown a connection + */ +static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) +{ + int ret = 0; + int err = 0; + int passive_state; + struct nes_cm_event event; + struct iw_cm_id *cm_id = cm_node->cm_id; + struct nes_cm_node *loopback = cm_node->loopbackpartner; + + nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", + __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); + + if (cm_node->tcp_cntxt.client) + return ret; + cleanup_retrans_entry(cm_node); + + if (!loopback) { + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) { + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_core, cm_node); + } else { + if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) { + rem_ref_cm_node(cm_core, cm_node); + } else { + ret = send_mpa_reject(cm_node); + if (ret) { + cm_node->state = NES_CM_STATE_CLOSED; + err = send_reset(cm_node, NULL); + if (err) + WARN_ON(1); + } else { + cm_id->add_ref(cm_id); + } + } + } + } else { + cm_node->cm_id = NULL; + if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) { + rem_ref_cm_node(cm_core, cm_node); + rem_ref_cm_node(cm_core, loopback); + } else { + event.cm_node = loopback; + event.cm_info.rem_addr = loopback->rem_addr; + event.cm_info.loc_addr = loopback->loc_addr; + event.cm_info.rem_port = loopback->rem_port; + event.cm_info.loc_port = loopback->loc_port; + event.cm_info.cm_id = loopback->cm_id; + cm_event_mpa_reject(&event); + rem_ref_cm_node(cm_core, cm_node); + loopback->state = NES_CM_STATE_CLOSING; + + cm_id = loopback->cm_id; + rem_ref_cm_node(cm_core, loopback); + cm_id->rem_ref(cm_id); + } + } + + return ret; +} + + +/** + * mini_cm_close + */ +static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) +{ + int ret = 0; + + if (!cm_core || !cm_node) + return -EINVAL; + + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_MPAREQ_SENT: + case NES_CM_STATE_MPAREQ_RCVD: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_CLOSE_WAIT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TIME_WAIT: + case NES_CM_STATE_CLOSING: + ret = -1; + break; + case NES_CM_STATE_LISTENING: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_MPAREJ_RCVD: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_LISTENER_DESTROYED: + ret = rem_ref_cm_node(cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + if (cm_node->send_entry) + printk(KERN_ERR "ERROR Close got called from STATE_TSA " + "send_entry=%p\n", cm_node->send_entry); + ret = rem_ref_cm_node(cm_core, cm_node); + break; + } + return ret; +} + + +/** + * recv_pkt - recv an ETHERNET packet, and process it through CM + * node state machine + */ +static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct sk_buff *skb) +{ + struct nes_cm_node *cm_node = NULL; + struct nes_cm_listener *listener = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct nes_cm_info nfo; + int skb_handled = 1; + __be32 tmp_daddr, tmp_saddr; + + if (!skb) + return 0; + if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) + return 0; + + iph = (struct iphdr *)skb->data; + tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); + + nfo.loc_addr = ntohl(iph->daddr); + nfo.loc_port = ntohs(tcph->dest); + nfo.rem_addr = ntohl(iph->saddr); + nfo.rem_port = ntohs(tcph->source); + + tmp_daddr = cpu_to_be32(iph->daddr); + tmp_saddr = cpu_to_be32(iph->saddr); + + nes_debug(NES_DBG_CM, "Received packet: dest=%pI4:0x%04X src=%pI4:0x%04X\n", + &tmp_daddr, tcph->dest, &tmp_saddr, tcph->source); + + do { + cm_node = find_node(cm_core, + nfo.rem_port, nfo.rem_addr, + nfo.loc_port, nfo.loc_addr); + + if (!cm_node) { + /* Only type of packet accepted are for */ + /* the PASSIVE open (syn only) */ + if ((!tcph->syn) || (tcph->ack)) { + skb_handled = 0; + break; + } + listener = find_listener(cm_core, nfo.loc_addr, + nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); + if (!listener) { + nfo.cm_id = NULL; + nfo.conn_type = 0; + nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n"); + skb_handled = 0; + break; + } + nfo.cm_id = listener->cm_id; + nfo.conn_type = listener->conn_type; + cm_node = make_cm_node(cm_core, nesvnic, &nfo, + listener); + if (!cm_node) { + nes_debug(NES_DBG_CM, "Unable to allocate " + "node\n"); + cm_packets_dropped++; + atomic_dec(&listener->ref_count); + dev_kfree_skb_any(skb); + break; + } + if (!tcph->rst && !tcph->fin) { + cm_node->state = NES_CM_STATE_LISTENING; + } else { + cm_packets_dropped++; + rem_ref_cm_node(cm_core, cm_node); + dev_kfree_skb_any(skb); + break; + } + add_ref_cm_node(cm_node); + } else if (cm_node->state == NES_CM_STATE_TSA) { + if (cm_node->nesqp->pau_mode) + nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp); + else { + rem_ref_cm_node(cm_core, cm_node); + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); + } + break; + } + skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*tcph)); + skb->len = ntohs(iph->tot_len); + process_packet(cm_node, skb, cm_core); + rem_ref_cm_node(cm_core, cm_node); + } while (0); + return skb_handled; +} + + +/** + * nes_cm_alloc_core - allocate a top level instance of a cm core + */ +static struct nes_cm_core *nes_cm_alloc_core(void) +{ + struct nes_cm_core *cm_core; + + /* setup the CM core */ + /* alloc top level core control structure */ + cm_core = kzalloc(sizeof(*cm_core), GFP_KERNEL); + if (!cm_core) + return NULL; + + INIT_LIST_HEAD(&cm_core->connected_nodes); + init_timer(&cm_core->tcp_timer); + cm_core->tcp_timer.function = nes_cm_timer_tick; + + cm_core->mtu = NES_CM_DEFAULT_MTU; + cm_core->state = NES_CM_STATE_INITED; + cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS; + + atomic_set(&cm_core->events_posted, 0); + + cm_core->api = &nes_cm_api; + + spin_lock_init(&cm_core->ht_lock); + spin_lock_init(&cm_core->listen_list_lock); + + INIT_LIST_HEAD(&cm_core->listen_list.list); + + nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core); + + nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); + cm_core->event_wq = create_singlethread_workqueue("nesewq"); + cm_core->post_event = nes_cm_post_event; + nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); + cm_core->disconn_wq = create_singlethread_workqueue("nesdwq"); + + print_core(cm_core); + return cm_core; +} + + +/** + * mini_cm_dealloc_core - deallocate a top level instance of a cm core + */ +static int mini_cm_dealloc_core(struct nes_cm_core *cm_core) +{ + nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core); + + if (!cm_core) + return -EINVAL; + + barrier(); + + if (timer_pending(&cm_core->tcp_timer)) + del_timer(&cm_core->tcp_timer); + + destroy_workqueue(cm_core->event_wq); + destroy_workqueue(cm_core->disconn_wq); + nes_debug(NES_DBG_CM, "\n"); + kfree(cm_core); + + return 0; +} + + +/** + * mini_cm_get + */ +static int mini_cm_get(struct nes_cm_core *cm_core) +{ + return cm_core->state; +} + + +/** + * mini_cm_set + */ +static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value) +{ + int ret = 0; + + switch (type) { + case NES_CM_SET_PKT_SIZE: + cm_core->mtu = value; + break; + case NES_CM_SET_FREE_PKT_Q_SIZE: + cm_core->free_tx_pkt_max = value; + break; + default: + /* unknown set option */ + ret = -EINVAL; + } + + return ret; +} + + +/** + * nes_cm_init_tsa_conn setup HW; MPA frames must be + * successfully exchanged when this is called + */ +static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_node) +{ + int ret = 0; + + if (!nesqp) + return -EINVAL; + + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 | + NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | + NES_QPCONTEXT_MISC_DROS); + + if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale) + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE); + + nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT); + + nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); + + nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( + (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); + + nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( + (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); + + nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( + (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); + + nesqp->nesqp_context->keepalive = cpu_to_le32(0x80); + nesqp->nesqp_context->ts_recent = 0; + nesqp->nesqp_context->ts_age = 0; + nesqp->nesqp_context->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd); + nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); + nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd << + cm_node->tcp_cntxt.rcv_wscale); + nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->srtt = 0; + nesqp->nesqp_context->rttvar = cpu_to_le32(0x6); + nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000); + nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss); + nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); + nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); + nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd); + + nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X," + " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", + nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), + le32_to_cpu(nesqp->nesqp_context->rcv_wnd), + le32_to_cpu(nesqp->nesqp_context->misc)); + nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd)); + nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd)); + nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd)); + + nes_debug(NES_DBG_CM, "Change cm_node state to TSA\n"); + cm_node->state = NES_CM_STATE_TSA; + + return ret; +} + + +/** + * nes_cm_disconn + */ +int nes_cm_disconn(struct nes_qp *nesqp) +{ + struct disconn_work *work; + + work = kzalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; /* Timer will clean up */ + + nes_add_ref(&nesqp->ibqp); + work->nesqp = nesqp; + INIT_WORK(&work->work, nes_disconnect_worker); + queue_work(g_cm_core->disconn_wq, &work->work); + return 0; +} + + +/** + * nes_disconnect_worker + */ +static void nes_disconnect_worker(struct work_struct *work) +{ + struct disconn_work *dwork = container_of(work, struct disconn_work, work); + struct nes_qp *nesqp = dwork->nesqp; + + kfree(dwork); + nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", + nesqp->last_aeq, nesqp->hwqp.qp_id); + nes_cm_disconn_true(nesqp); + nes_rem_ref(&nesqp->ibqp); +} + + +/** + * nes_cm_disconn_true + */ +static int nes_cm_disconn_true(struct nes_qp *nesqp) +{ + unsigned long flags; + int ret = 0; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + struct nes_vnic *nesvnic; + u16 last_ae; + u8 original_hw_tcp_state; + u8 original_ibqp_state; + int disconn_status = 0; + int issue_disconn = 0; + int issue_close = 0; + int issue_flush = 0; + u32 flush_q = NES_CQP_FLUSH_RQ; + struct ib_event ibevent; + + if (!nesqp) { + nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n"); + return -1; + } + + spin_lock_irqsave(&nesqp->lock, flags); + cm_id = nesqp->cm_id; + /* make sure we havent already closed this connection */ + if (!cm_id) { + nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", + nesqp->hwqp.qp_id); + spin_unlock_irqrestore(&nesqp->lock, flags); + return -1; + } + + nesvnic = to_nesvnic(nesqp->ibqp.device); + nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id); + + original_hw_tcp_state = nesqp->hw_tcp_state; + original_ibqp_state = nesqp->ibqp_state; + last_ae = nesqp->last_aeq; + + if (nesqp->term_flags) { + issue_disconn = 1; + issue_close = 1; + nesqp->cm_id = NULL; + del_timer(&nesqp->terminate_timer); + if (nesqp->flush_issued == 0) { + nesqp->flush_issued = 1; + issue_flush = 1; + } + } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || + ((original_ibqp_state == IB_QPS_RTS) && + (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + issue_disconn = 1; + if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) + disconn_status = -ECONNRESET; + } + + if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) || + (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) || + (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) || + (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + issue_close = 1; + nesqp->cm_id = NULL; + if (nesqp->flush_issued == 0) { + nesqp->flush_issued = 1; + issue_flush = 1; + } + } + + spin_unlock_irqrestore(&nesqp->lock, flags); + + if ((issue_flush) && (nesqp->destroyed == 0)) { + /* Flush the queue(s) */ + if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE) + flush_q |= NES_CQP_FLUSH_SQ; + flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1); + + if (nesqp->term_flags) { + ibevent.device = nesqp->ibqp.device; + ibevent.event = nesqp->terminate_eventtype; + ibevent.element.qp = &nesqp->ibqp; + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + } + + if ((cm_id) && (cm_id->event_handler)) { + if (issue_disconn) { + atomic_inc(&cm_disconnects); + cm_event.event = IW_CM_EVENT_DISCONNECT; + cm_event.status = disconn_status; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" + " for QP%u, SQ Head = %u, SQ Tail = %u. " + "cm_id = %p, refcount = %u.\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, + nesqp->hwqp.sq_tail, cm_id, + atomic_read(&nesqp->refcount)); + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + nes_debug(NES_DBG_CM, "OFA CM event_handler " + "returned, ret=%d\n", ret); + } + + if (issue_close) { + atomic_inc(&cm_closes); + nes_disconnect(nesqp, 1); + + cm_id->provider_data = nesqp; + /* Send up the close complete event */ + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = 0; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + + cm_id->rem_ref(cm_id); + } + } + + return 0; +} + + +/** + * nes_disconnect + */ +static int nes_disconnect(struct nes_qp *nesqp, int abrupt) +{ + int ret = 0; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + struct nes_ib_device *nesibdev; + + nesvnic = to_nesvnic(nesqp->ibqp.device); + if (!nesvnic) + return -EINVAL; + + nesdev = nesvnic->nesdev; + nesibdev = nesvnic->nesibdev; + + nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", + netdev_refcnt_read(nesvnic->netdev)); + + if (nesqp->active_conn) { + + /* indicate this connection is NOT active */ + nesqp->active_conn = 0; + } else { + /* Need to free the Last Streaming Mode Message */ + if (nesqp->ietf_frame) { + if (nesqp->lsmm_mr) + nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); + pci_free_consistent(nesdev->pcidev, + nesqp->private_data_len + nesqp->ietf_frame_size, + nesqp->ietf_frame, nesqp->ietf_frame_pbase); + } + } + + /* close the CM node down if it is still active */ + if (nesqp->cm_node) { + nes_debug(NES_DBG_CM, "Call close API\n"); + + g_cm_core->api->close(g_cm_core, nesqp->cm_node); + } + + return ret; +} + + +/** + * nes_accept + */ +int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + u64 u64temp; + struct ib_qp *ibqp; + struct nes_qp *nesqp; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + struct nes_cm_node *cm_node; + struct nes_adapter *adapter; + struct ib_qp_attr attr; + struct iw_cm_event cm_event; + struct nes_hw_qp_wqe *wqe; + struct nes_v4_quad nes_quad; + u32 crc_value; + int ret; + int passive_state; + struct nes_ib_device *nesibdev; + struct ib_mr *ibmr = NULL; + struct ib_phys_buf ibphysbuf; + struct nes_pd *nespd; + u64 tagged_offset; + u8 mpa_frame_offset = 0; + struct ietf_mpa_v2 *mpa_v2_frame; + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; + + ibqp = nes_get_qp(cm_id->device, conn_param->qpn); + if (!ibqp) + return -EINVAL; + + /* get all our handles */ + nesqp = to_nesqp(ibqp); + nesvnic = to_nesvnic(nesqp->ibqp.device); + nesdev = nesvnic->nesdev; + adapter = nesdev->nesadapter; + + cm_node = (struct nes_cm_node *)cm_id->provider_data; + nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p," + "%s\n", cm_node, nesvnic, nesvnic->netdev, + nesvnic->netdev->name); + + if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) { + if (cm_node->loopbackpartner) + rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner); + rem_ref_cm_node(cm_node->cm_core, cm_node); + return -EINVAL; + } + + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + return -ECONNRESET; + } + + /* associate the node with the QP */ + nesqp->cm_node = (void *)cm_node; + cm_node->nesqp = nesqp; + + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", + nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); + atomic_inc(&cm_accepts); + + nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", + netdev_refcnt_read(nesvnic->netdev)); + + nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2); + /* allocate the ietf frame and space for private data */ + nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, + nesqp->ietf_frame_size + conn_param->private_data_len, + &nesqp->ietf_frame_pbase); + + if (!nesqp->ietf_frame) { + nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); + return -ENOMEM; + } + mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame; + + if (cm_node->mpa_frame_rev == IETF_MPA_V1) + mpa_frame_offset = 4; + + memcpy(mpa_v2_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); + + cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY); + nesqp->private_data_len = conn_param->private_data_len; + + /* setup our first outgoing iWarp send WQE (the IETF frame response) */ + wqe = &nesqp->hwqp.sq_vbase[0]; + + if (cm_id->remote_addr.sin_addr.s_addr != + cm_id->local_addr.sin_addr.s_addr) { + u64temp = (unsigned long)nesqp; + nesibdev = nesvnic->nesibdev; + nespd = nesqp->nespd; + ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset; + ibphysbuf.size = buff_len; + tagged_offset = (u64)(unsigned long)*start_buff; + ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd, + &ibphysbuf, 1, + IB_ACCESS_LOCAL_WRITE, + &tagged_offset); + if (!ibmr) { + nes_debug(NES_DBG_CM, "Unable to register memory region" + "for lSMM for cm_node = %p \n", + cm_node); + pci_free_consistent(nesdev->pcidev, + nesqp->private_data_len + nesqp->ietf_frame_size, + nesqp->ietf_frame, nesqp->ietf_frame_pbase); + return -ENOMEM; + } + + ibmr->pd = &nespd->ibpd; + ibmr->device = nespd->ibpd.device; + nesqp->lsmm_mr = ibmr; + + u64temp |= NES_SW_CONTEXT_ALIGN >> 1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | + NES_IWARP_SQ_WQE_WRPDU); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(buff_len); + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, + (u64)(unsigned long)(*start_buff)); + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = + cpu_to_le32(buff_len); + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey; + if (nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); + } + + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU); + } else { + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU); + } + nesqp->skip_lsmm = 1; + + + /* Cache the cm_id in the qp */ + nesqp->cm_id = cm_id; + cm_node->cm_id = cm_id; + + /* nesqp->cm_node = (void *)cm_id->provider_data; */ + cm_id->provider_data = nesqp; + nesqp->active_conn = 0; + + if (cm_node->state == NES_CM_STATE_TSA) + nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", + cm_node); + + nes_cm_init_tsa_conn(nesqp, cm_node); + + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + + nesqp->nesqp_context->misc2 |= cpu_to_le32( + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + + nesqp->nesqp_context->arp_index_vlan |= + cpu_to_le32(nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, + NES_ARP_RESOLVE) << 16); + + nesqp->nesqp_context->ts_val_delta = cpu_to_le32( + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + + nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); + + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( + ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)conn_param->ord); + + memset(&nes_quad, 0, sizeof(nes_quad)); + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; + nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + + /* Produce hash key */ + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); + nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); + + nesqp->hte_index &= adapter->hte_index_mask; + nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); + + cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); + + nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " + "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " + "private data length=%u.\n", nesqp->hwqp.qp_id, + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port), + le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + buff_len); + + /* notify OF layer that accept event was successful */ + cm_id->add_ref(cm_id); + nes_add_ref(&nesqp->ibqp); + + cm_event.event = IW_CM_EVENT_ESTABLISHED; + cm_event.status = 0; + cm_event.provider_data = (void *)nesqp; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + ret = cm_id->event_handler(cm_id, &cm_event); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); + if (cm_node->loopbackpartner) { + cm_node->loopbackpartner->mpa_frame_size = + nesqp->private_data_len; + /* copy entire MPA frame to our cm_node's frame */ + memcpy(cm_node->loopbackpartner->mpa_frame_buf, + conn_param->private_data, conn_param->private_data_len); + create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); + } + if (ret) + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + + return 0; +} + + +/** + * nes_reject + */ +int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + struct nes_cm_node *cm_node; + struct nes_cm_node *loopback; + struct nes_cm_core *cm_core; + u8 *start_buff; + + atomic_inc(&cm_rejects); + cm_node = (struct nes_cm_node *)cm_id->provider_data; + loopback = cm_node->loopbackpartner; + cm_core = cm_node->cm_core; + cm_node->cm_id = cm_id; + + if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER) + return -EINVAL; + + if (loopback) { + memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len); + loopback->mpa_frame.priv_data_len = pdata_len; + loopback->mpa_frame_size = pdata_len; + } else { + start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); + cm_node->mpa_frame_size = pdata_len; + memcpy(start_buff, pdata, pdata_len); + } + return cm_core->api->reject(cm_core, cm_node); +} + + +/** + * nes_connect + * setup and launch cm connect node + */ +int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct ib_qp *ibqp; + struct nes_qp *nesqp; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + struct nes_cm_node *cm_node; + struct nes_cm_info cm_info; + int apbvt_set = 0; + + ibqp = nes_get_qp(cm_id->device, conn_param->qpn); + if (!ibqp) + return -EINVAL; + nesqp = to_nesqp(ibqp); + if (!nesqp) + return -EINVAL; + nesvnic = to_nesvnic(nesqp->ibqp.device); + if (!nesvnic) + return -EINVAL; + nesdev = nesvnic->nesdev; + if (!nesdev) + return -EINVAL; + + if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port)) + return -EINVAL; + + nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " + "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, + ntohl(nesvnic->local_ipaddr), + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port)); + + atomic_inc(&cm_connects); + nesqp->active_conn = 1; + + /* cache the cm_id in the qp */ + nesqp->cm_id = cm_id; + + cm_id->provider_data = nesqp; + + nesqp->private_data_len = conn_param->private_data_len; + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); + nes_debug(NES_DBG_CM, "mpa private data len =%u\n", + conn_param->private_data_len); + + if (cm_id->local_addr.sin_addr.s_addr != + cm_id->remote_addr.sin_addr.s_addr) { + nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + apbvt_set = 1; + } + + /* set up the connection params for the node */ + cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr); + cm_info.loc_port = htons(cm_id->local_addr.sin_port); + cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr); + cm_info.rem_port = htons(cm_id->remote_addr.sin_port); + cm_info.cm_id = cm_id; + cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + + cm_id->add_ref(cm_id); + + /* create a connect CM node connection */ + cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, + conn_param->private_data_len, (void *)conn_param->private_data, + &cm_info); + if (!cm_node) { + if (apbvt_set) + nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + + cm_id->rem_ref(cm_id); + return -ENOMEM; + } + + cm_node->apbvt_set = apbvt_set; + nesqp->cm_node = cm_node; + cm_node->nesqp = nesqp; + nes_add_ref(&nesqp->ibqp); + + return 0; +} + + +/** + * nes_create_listen + */ +int nes_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct nes_vnic *nesvnic; + struct nes_cm_listener *cm_node; + struct nes_cm_info cm_info; + int err; + + nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", + cm_id, ntohs(cm_id->local_addr.sin_port)); + + nesvnic = to_nesvnic(cm_id->device); + if (!nesvnic) + return -EINVAL; + + nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", + nesvnic, nesvnic->netdev, nesvnic->netdev->name); + + nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n", + nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr); + + /* setup listen params in our api call struct */ + cm_info.loc_addr = nesvnic->local_ipaddr; + cm_info.loc_port = cm_id->local_addr.sin_port; + cm_info.backlog = backlog; + cm_info.cm_id = cm_id; + + cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + + + cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); + if (!cm_node) { + printk(KERN_ERR "%s[%u] Error returned from listen API call\n", + __func__, __LINE__); + return -ENOMEM; + } + + cm_id->provider_data = cm_node; + + if (!cm_node->reused_node) { + err = nes_manage_apbvt(nesvnic, + ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); + if (err) { + printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", + err); + g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); + return err; + } + atomic_inc(&cm_listens_created); + } + + cm_id->add_ref(cm_id); + cm_id->provider_data = (void *)cm_node; + + + return 0; +} + + +/** + * nes_destroy_listen + */ +int nes_destroy_listen(struct iw_cm_id *cm_id) +{ + if (cm_id->provider_data) + g_cm_core->api->stop_listener(g_cm_core, cm_id->provider_data); + else + nes_debug(NES_DBG_CM, "cm_id->provider_data was NULL\n"); + + cm_id->rem_ref(cm_id); + + return 0; +} + + +/** + * nes_cm_recv + */ +int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) +{ + int rc = 0; + + cm_packets_received++; + if ((g_cm_core) && (g_cm_core->api)) + rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); + else + nes_debug(NES_DBG_CM, "Unable to process packet for CM," + " cm is not setup properly.\n"); + + return rc; +} + + +/** + * nes_cm_start + * Start and init a cm core module + */ +int nes_cm_start(void) +{ + nes_debug(NES_DBG_CM, "\n"); + /* create the primary CM core, pass this handle to subsequent core inits */ + g_cm_core = nes_cm_alloc_core(); + if (g_cm_core) + return 0; + else + return -ENOMEM; +} + + +/** + * nes_cm_stop + * stop and dealloc all cm core instances + */ +int nes_cm_stop(void) +{ + g_cm_core->api->destroy_cm_core(g_cm_core); + return 0; +} + + +/** + * cm_event_connected + * handle a connected event, setup QPs and HW + */ +static void cm_event_connected(struct nes_cm_event *event) +{ + struct nes_qp *nesqp; + struct nes_vnic *nesvnic; + struct nes_device *nesdev; + struct nes_cm_node *cm_node; + struct nes_adapter *nesadapter; + struct ib_qp_attr attr; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + struct nes_v4_quad nes_quad; + u32 crc_value; + int ret; + + /* get all our handles */ + cm_node = event->cm_node; + cm_id = cm_node->cm_id; + nes_debug(NES_DBG_CM, "cm_event_connected - %p - cm_id = %p\n", cm_node, cm_id); + nesqp = (struct nes_qp *)cm_id->provider_data; + nesvnic = to_nesvnic(nesqp->ibqp.device); + nesdev = nesvnic->nesdev; + nesadapter = nesdev->nesadapter; + + if (nesqp->destroyed) + return; + atomic_inc(&cm_connecteds); + nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on" + " local port 0x%04X. jiffies = %lu.\n", + nesqp->hwqp.qp_id, + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohs(cm_id->local_addr.sin_port), + jiffies); + + nes_cm_init_tsa_conn(nesqp, cm_node); + + /* set the QP tsa context */ + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + + nesqp->nesqp_context->misc2 |= cpu_to_le32( + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( + nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), + NULL, NES_ARP_RESOLVE) << 16); + nesqp->nesqp_context->ts_val_delta = cpu_to_le32( + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)1 << + NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + + /* Adjust tail for not having a LSMM */ + /*nesqp->hwqp.sq_tail = 1;*/ + + build_rdma0_msg(cm_node, &nesqp); + + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); + + memset(&nes_quad, 0, sizeof(nes_quad)); + + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; + nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + + /* Produce hash key */ + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); + nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); + + nesqp->hte_index &= nesadapter->hte_index_mask; + nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); + + nesqp->ietf_frame = &cm_node->mpa_frame; + nesqp->private_data_len = (u8)cm_node->mpa_frame_size; + cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); + + /* notify OF layer we successfully created the requested connection */ + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = 0; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr.sin_family = AF_INET; + cm_event.local_addr.sin_port = cm_id->local_addr.sin_port; + cm_event.remote_addr = cm_id->remote_addr; + + cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; + + cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; + ret = cm_id->event_handler(cm_id, &cm_event); + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + + if (ret) + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); + + nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " + "%lu\n", nesqp->hwqp.qp_id, jiffies); + + return; +} + + +/** + * cm_event_connect_error + */ +static void cm_event_connect_error(struct nes_cm_event *event) +{ + struct nes_qp *nesqp; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + /* struct nes_cm_info cm_info; */ + int ret; + + if (!event->cm_node) + return; + + cm_id = event->cm_node->cm_id; + if (!cm_id) + return; + + nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id); + nesqp = cm_id->provider_data; + + if (!nesqp) + return; + + /* notify OF layer about this connection error event */ + /* cm_id->rem_ref(cm_id); */ + nesqp->cm_id = NULL; + cm_id->provider_data = NULL; + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = -ECONNRESET; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, " + "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); + + ret = cm_id->event_handler(cm_id, &cm_event); + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + if (ret) + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + cm_id->rem_ref(cm_id); + + rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); + return; +} + + +/** + * cm_event_reset + */ +static void cm_event_reset(struct nes_cm_event *event) +{ + struct nes_qp *nesqp; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + /* struct nes_cm_info cm_info; */ + int ret; + + if (!event->cm_node) + return; + + if (!event->cm_node->cm_id) + return; + + cm_id = event->cm_node->cm_id; + + nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id); + nesqp = cm_id->provider_data; + if (!nesqp) + return; + + nesqp->cm_id = NULL; + /* cm_id->provider_data = NULL; */ + cm_event.event = IW_CM_EVENT_DISCONNECT; + cm_event.status = -ECONNRESET; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + cm_id->add_ref(cm_id); + ret = cm_id->event_handler(cm_id, &cm_event); + atomic_inc(&cm_closes); + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = 0; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node); + ret = cm_id->event_handler(cm_id, &cm_event); + + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + + + /* notify OF layer about this connection error event */ + cm_id->rem_ref(cm_id); + + return; +} + + +/** + * cm_event_mpa_req + */ +static void cm_event_mpa_req(struct nes_cm_event *event) +{ + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + int ret; + struct nes_cm_node *cm_node; + + cm_node = event->cm_node; + if (!cm_node) + return; + cm_id = cm_node->cm_id; + + atomic_inc(&cm_connect_reqs); + nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", + cm_node, cm_id, jiffies); + + cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; + cm_event.status = 0; + cm_event.provider_data = (void *)cm_node; + + cm_event.local_addr.sin_family = AF_INET; + cm_event.local_addr.sin_port = htons(event->cm_info.loc_port); + cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr); + + cm_event.remote_addr.sin_family = AF_INET; + cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); + cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); + cm_event.private_data = cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8)cm_node->mpa_frame_size; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", + __func__, __LINE__, ret); + return; +} + + +static void cm_event_mpa_reject(struct nes_cm_event *event) +{ + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + struct nes_cm_node *cm_node; + int ret; + + cm_node = event->cm_node; + if (!cm_node) + return; + cm_id = cm_node->cm_id; + + atomic_inc(&cm_connect_reqs); + nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", + cm_node, cm_id, jiffies); + + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = -ECONNREFUSED; + cm_event.provider_data = cm_id->provider_data; + + cm_event.local_addr.sin_family = AF_INET; + cm_event.local_addr.sin_port = htons(event->cm_info.loc_port); + cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr); + + cm_event.remote_addr.sin_family = AF_INET; + cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); + cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); + + cm_event.private_data = cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8)cm_node->mpa_frame_size; + + nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, " + "remove_addr=%08x\n", + cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", + __func__, __LINE__, ret); + + return; +} + + +static void nes_cm_event_handler(struct work_struct *); + +/** + * nes_cm_post_event + * post an event to the cm event handler + */ +static int nes_cm_post_event(struct nes_cm_event *event) +{ + atomic_inc(&event->cm_node->cm_core->events_posted); + add_ref_cm_node(event->cm_node); + event->cm_info.cm_id->add_ref(event->cm_info.cm_id); + INIT_WORK(&event->event_work, nes_cm_event_handler); + nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", + event->cm_node, event); + + queue_work(event->cm_node->cm_core->event_wq, &event->event_work); + + nes_debug(NES_DBG_CM, "Exit\n"); + return 0; +} + + +/** + * nes_cm_event_handler + * worker function to handle cm events + * will free instance of nes_cm_event + */ +static void nes_cm_event_handler(struct work_struct *work) +{ + struct nes_cm_event *event = container_of(work, struct nes_cm_event, + event_work); + struct nes_cm_core *cm_core; + + if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) + return; + + cm_core = event->cm_node->cm_core; + nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", + event, event->type, atomic_read(&cm_core->events_posted)); + + switch (event->type) { + case NES_CM_EVENT_MPA_REQ: + cm_event_mpa_req(event); + nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", + event->cm_node); + break; + case NES_CM_EVENT_RESET: + nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", + event->cm_node); + cm_event_reset(event); + break; + case NES_CM_EVENT_CONNECTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state != NES_CM_STATE_TSA)) + break; + cm_event_connected(event); + nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + break; + case NES_CM_EVENT_MPA_REJECT: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) + break; + cm_event_mpa_reject(event); + nes_debug(NES_DBG_CM, "CM Event: REJECT\n"); + break; + + case NES_CM_EVENT_ABORTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) + break; + cm_event_connect_error(event); + nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); + break; + case NES_CM_EVENT_DROPPED_PKT: + nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); + break; + default: + nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + break; + } + + atomic_dec(&cm_core->events_posted); + event->cm_info.cm_id->rem_ref(event->cm_info.cm_id); + rem_ref_cm_node(cm_core, event->cm_node); + kfree(event); + + return; +} diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h new file mode 100644 index 00000000..4646e666 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef NES_CM_H +#define NES_CM_H + +#define QUEUE_EVENTS + +#define NES_MANAGE_APBVT_DEL 0 +#define NES_MANAGE_APBVT_ADD 1 + +#define NES_MPA_REQUEST_ACCEPT 1 +#define NES_MPA_REQUEST_REJECT 2 + +/* IETF MPA -- defines, enums, structs */ +#define IEFT_MPA_KEY_REQ "MPA ID Req Frame" +#define IEFT_MPA_KEY_REP "MPA ID Rep Frame" +#define IETF_MPA_KEY_SIZE 16 +#define IETF_MPA_VERSION 1 +#define IETF_MAX_PRIV_DATA_LEN 512 +#define IETF_MPA_FRAME_SIZE 20 +#define IETF_RTR_MSG_SIZE 4 +#define IETF_MPA_V2_FLAG 0x10 + +/* IETF RTR MSG Fields */ +#define IETF_PEER_TO_PEER 0x8000 +#define IETF_FLPDU_ZERO_LEN 0x4000 +#define IETF_RDMA0_WRITE 0x8000 +#define IETF_RDMA0_READ 0x4000 +#define IETF_NO_IRD_ORD 0x3FFF + +enum ietf_mpa_flags { + IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ + IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */ + IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */ +}; + +struct ietf_mpa_v1 { + u8 key[IETF_MPA_KEY_SIZE]; + u8 flags; + u8 rev; + __be16 priv_data_len; + u8 priv_data[0]; +}; + +#define ietf_mpa_req_resp_frame ietf_mpa_frame + +struct ietf_rtr_msg { + __be16 ctrl_ird; + __be16 ctrl_ord; +}; + +struct ietf_mpa_v2 { + u8 key[IETF_MPA_KEY_SIZE]; + u8 flags; + u8 rev; + __be16 priv_data_len; + struct ietf_rtr_msg rtr_msg; + u8 priv_data[0]; +}; + +struct nes_v4_quad { + u32 rsvd0; + __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */ + __be32 SrcIpadr; + __be16 TcpPorts[2]; /* src is low, dest is high */ +}; + +struct nes_cm_node; +enum nes_timer_type { + NES_TIMER_TYPE_SEND, + NES_TIMER_TYPE_RECV, + NES_TIMER_NODE_CLEANUP, + NES_TIMER_TYPE_CLOSE, +}; + +#define NES_PASSIVE_STATE_INDICATED 0 +#define NES_DO_NOT_SEND_RESET_EVENT 1 +#define NES_SEND_RESET_EVENT 2 + +#define MAX_NES_IFS 4 + +#define SET_ACK 1 +#define SET_SYN 2 +#define SET_FIN 4 +#define SET_RST 8 + +#define TCP_OPTIONS_PADDING 3 + +struct option_base { + u8 optionnum; + u8 length; +}; + +enum option_numbers { + OPTION_NUMBER_END, + OPTION_NUMBER_NONE, + OPTION_NUMBER_MSS, + OPTION_NUMBER_WINDOW_SCALE, + OPTION_NUMBER_SACK_PERM, + OPTION_NUMBER_SACK, + OPTION_NUMBER_WRITE0 = 0xbc +}; + +struct option_mss { + u8 optionnum; + u8 length; + __be16 mss; +}; + +struct option_windowscale { + u8 optionnum; + u8 length; + u8 shiftcount; +}; + +union all_known_options { + char as_end; + struct option_base as_base; + struct option_mss as_mss; + struct option_windowscale as_windowscale; +}; + +struct nes_timer_entry { + struct list_head list; + unsigned long timetosend; /* jiffies */ + struct sk_buff *skb; + u32 type; + u32 retrycount; + u32 retranscount; + u32 context; + u32 seq_num; + u32 send_retrans; + int close_when_complete; + struct net_device *netdev; +}; + +#define NES_DEFAULT_RETRYS 64 +#define NES_DEFAULT_RETRANS 8 +#ifdef CONFIG_INFINIBAND_NES_DEBUG +#define NES_RETRY_TIMEOUT (1000*HZ/1000) +#else +#define NES_RETRY_TIMEOUT (3000*HZ/1000) +#endif +#define NES_SHORT_TIME (10) +#define NES_LONG_TIME (2000*HZ/1000) +#define NES_MAX_TIMEOUT ((unsigned long) (12*HZ)) + +#define NES_CM_HASHTABLE_SIZE 1024 +#define NES_CM_TCP_TIMER_INTERVAL 3000 +#define NES_CM_DEFAULT_MTU 1540 +#define NES_CM_DEFAULT_FRAME_CNT 10 +#define NES_CM_THREAD_STACK_SIZE 256 +#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed +#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed +#define NES_CM_DEFAULT_RCV_WND_SCALE 2 +#define NES_CM_DEFAULT_FREE_PKTS 0x000A +#define NES_CM_FREE_PKT_LO_WATERMARK 2 + +#define NES_CM_DEFAULT_MSS 536 + +#define NES_CM_DEF_SEQ 0x159bf75f +#define NES_CM_DEF_LOCAL_ID 0x3b47 + +#define NES_CM_DEF_SEQ2 0x18ed5740 +#define NES_CM_DEF_LOCAL_ID2 0xb807 +#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN) + +typedef u32 nes_addr_t; + +#define nes_cm_tsa_context nes_qp_context + +struct nes_qp; + +/* cm node transition states */ +enum nes_cm_node_state { + NES_CM_STATE_UNKNOWN, + NES_CM_STATE_INITED, + NES_CM_STATE_LISTENING, + NES_CM_STATE_SYN_RCVD, + NES_CM_STATE_SYN_SENT, + NES_CM_STATE_ONE_SIDE_ESTABLISHED, + NES_CM_STATE_ESTABLISHED, + NES_CM_STATE_ACCEPTING, + NES_CM_STATE_MPAREQ_SENT, + NES_CM_STATE_MPAREQ_RCVD, + NES_CM_STATE_MPAREJ_RCVD, + NES_CM_STATE_TSA, + NES_CM_STATE_FIN_WAIT1, + NES_CM_STATE_FIN_WAIT2, + NES_CM_STATE_CLOSE_WAIT, + NES_CM_STATE_TIME_WAIT, + NES_CM_STATE_LAST_ACK, + NES_CM_STATE_CLOSING, + NES_CM_STATE_LISTENER_DESTROYED, + NES_CM_STATE_CLOSED +}; + +enum mpa_frame_version { + IETF_MPA_V1 = 1, + IETF_MPA_V2 = 2 +}; + +enum mpa_frame_key { + MPA_KEY_REQUEST, + MPA_KEY_REPLY +}; + +enum send_rdma0 { + SEND_RDMA_READ_ZERO = 1, + SEND_RDMA_WRITE_ZERO = 2 +}; + +enum nes_tcpip_pkt_type { + NES_PKT_TYPE_UNKNOWN, + NES_PKT_TYPE_SYN, + NES_PKT_TYPE_SYNACK, + NES_PKT_TYPE_ACK, + NES_PKT_TYPE_FIN, + NES_PKT_TYPE_RST +}; + + +/* type of nes connection */ +enum nes_cm_conn_type { + NES_CM_IWARP_CONN_TYPE, +}; + +/* CM context params */ +struct nes_cm_tcp_context { + u8 client; + + u32 loc_seq_num; + u32 loc_ack_num; + u32 rem_ack_num; + u32 rcv_nxt; + + u32 loc_id; + u32 rem_id; + + u32 snd_wnd; + u32 max_snd_wnd; + + u32 rcv_wnd; + u32 mss; + u8 snd_wscale; + u8 rcv_wscale; + + struct nes_cm_tsa_context tsa_cntxt; + struct timeval sent_ts; +}; + + +enum nes_cm_listener_state { + NES_CM_LISTENER_PASSIVE_STATE = 1, + NES_CM_LISTENER_ACTIVE_STATE = 2, + NES_CM_LISTENER_EITHER_STATE = 3 +}; + +struct nes_cm_listener { + struct list_head list; + struct nes_cm_core *cm_core; + u8 loc_mac[ETH_ALEN]; + nes_addr_t loc_addr; + u16 loc_port; + struct iw_cm_id *cm_id; + enum nes_cm_conn_type conn_type; + atomic_t ref_count; + struct nes_vnic *nesvnic; + atomic_t pend_accepts_cnt; + int backlog; + enum nes_cm_listener_state listener_state; + u32 reused_node; +}; + +/* per connection node and node state information */ +struct nes_cm_node { + nes_addr_t loc_addr, rem_addr; + u16 loc_port, rem_port; + + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; + + enum nes_cm_node_state state; + struct nes_cm_tcp_context tcp_cntxt; + struct nes_cm_core *cm_core; + struct sk_buff_head resend_list; + atomic_t ref_count; + struct net_device *netdev; + + struct nes_cm_node *loopbackpartner; + + struct nes_timer_entry *send_entry; + struct nes_timer_entry *recv_entry; + spinlock_t retrans_list_lock; + enum send_rdma0 send_rdma0_op; + + union { + struct ietf_mpa_v1 mpa_frame; + struct ietf_mpa_v2 mpa_v2_frame; + u8 mpa_frame_buf[MAX_CM_BUFFER]; + }; + enum mpa_frame_version mpa_frame_rev; + u16 ird_size; + u16 ord_size; + + u16 mpa_frame_size; + struct iw_cm_id *cm_id; + struct list_head list; + int accelerated; + struct nes_cm_listener *listener; + enum nes_cm_conn_type conn_type; + struct nes_vnic *nesvnic; + int apbvt_set; + int accept_pend; + struct list_head timer_entry; + struct list_head reset_entry; + struct nes_qp *nesqp; + atomic_t passive_state; +}; + +/* structure for client or CM to fill when making CM api calls. */ +/* - only need to set relevant data, based on op. */ +struct nes_cm_info { + union { + struct iw_cm_id *cm_id; + struct net_device *netdev; + }; + + u16 loc_port; + u16 rem_port; + nes_addr_t loc_addr; + nes_addr_t rem_addr; + + enum nes_cm_conn_type conn_type; + int backlog; +}; + +/* CM event codes */ +enum nes_cm_event_type { + NES_CM_EVENT_UNKNOWN, + NES_CM_EVENT_ESTABLISHED, + NES_CM_EVENT_MPA_REQ, + NES_CM_EVENT_MPA_CONNECT, + NES_CM_EVENT_MPA_ACCEPT, + NES_CM_EVENT_MPA_REJECT, + NES_CM_EVENT_MPA_ESTABLISHED, + NES_CM_EVENT_CONNECTED, + NES_CM_EVENT_CLOSED, + NES_CM_EVENT_RESET, + NES_CM_EVENT_DROPPED_PKT, + NES_CM_EVENT_CLOSE_IMMED, + NES_CM_EVENT_CLOSE_HARD, + NES_CM_EVENT_CLOSE_CLEAN, + NES_CM_EVENT_ABORTED, + NES_CM_EVENT_SEND_FIRST +}; + +/* event to post to CM event handler */ +struct nes_cm_event { + enum nes_cm_event_type type; + + struct nes_cm_info cm_info; + struct work_struct event_work; + struct nes_cm_node *cm_node; +}; + +struct nes_cm_core { + enum nes_cm_node_state state; + + atomic_t listen_node_cnt; + struct nes_cm_node listen_list; + spinlock_t listen_list_lock; + + u32 mtu; + u32 free_tx_pkt_max; + u32 rx_pkt_posted; + atomic_t ht_node_cnt; + struct list_head connected_nodes; + /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */ + spinlock_t ht_lock; + + struct timer_list tcp_timer; + + struct nes_cm_ops *api; + + int (*post_event)(struct nes_cm_event *event); + atomic_t events_posted; + struct workqueue_struct *event_wq; + struct workqueue_struct *disconn_wq; + + atomic_t node_cnt; + u64 aborted_connects; + u32 options; + + struct nes_cm_node *current_listen_node; +}; + + +#define NES_CM_SET_PKT_SIZE (1 << 1) +#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2) + +/* CM ops/API for client interface */ +struct nes_cm_ops { + int (*accelerated)(struct nes_cm_core *, struct nes_cm_node *); + struct nes_cm_listener * (*listen)(struct nes_cm_core *, struct nes_vnic *, + struct nes_cm_info *); + int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *); + struct nes_cm_node * (*connect)(struct nes_cm_core *, + struct nes_vnic *, u16, void *, + struct nes_cm_info *); + int (*close)(struct nes_cm_core *, struct nes_cm_node *); + int (*accept)(struct nes_cm_core *, struct nes_cm_node *); + int (*reject)(struct nes_cm_core *, struct nes_cm_node *); + int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, + struct sk_buff *); + int (*destroy_cm_core)(struct nes_cm_core *); + int (*get)(struct nes_cm_core *); + int (*set)(struct nes_cm_core *, u32, u32); +}; + +int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *, + enum nes_timer_type, int, int); + +int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *); +int nes_reject(struct iw_cm_id *, const void *, u8); +int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *); +int nes_create_listen(struct iw_cm_id *, int); +int nes_destroy_listen(struct iw_cm_id *); + +int nes_cm_recv(struct sk_buff *, struct net_device *); +int nes_cm_start(void); +int nes_cm_stop(void); +int nes_add_ref_cm_node(struct nes_cm_node *cm_node); +int nes_rem_ref_cm_node(struct nes_cm_node *cm_node); + +#endif /* NES_CM_H */ diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h new file mode 100644 index 00000000..a69eef16 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_context.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef NES_CONTEXT_H +#define NES_CONTEXT_H + +struct nes_qp_context { + __le32 misc; + __le32 cqs; + __le32 sq_addr_low; + __le32 sq_addr_high; + __le32 rq_addr_low; + __le32 rq_addr_high; + __le32 misc2; + __le16 tcpPorts[2]; + __le32 ip0; + __le32 ip1; + __le32 ip2; + __le32 ip3; + __le32 mss; + __le32 arp_index_vlan; + __le32 tcp_state_flow_label; + __le32 pd_index_wscale; + __le32 keepalive; + u32 ts_recent; + u32 ts_age; + __le32 snd_nxt; + __le32 snd_wnd; + __le32 rcv_nxt; + __le32 rcv_wnd; + __le32 snd_max; + __le32 snd_una; + u32 srtt; + __le32 rttvar; + __le32 ssthresh; + __le32 cwnd; + __le32 snd_wl1; + __le32 snd_wl2; + __le32 max_snd_wnd; + __le32 ts_val_delta; + u32 retransmit; + u32 probe_cnt; + u32 hte_index; + __le32 q2_addr_low; + __le32 q2_addr_high; + __le32 ird_index; + u32 Rsvd3; + __le32 ird_ord_sizes; + u32 mrkr_offset; + __le32 aeq_token_low; + __le32 aeq_token_high; +}; + +/* QP Context Misc Field */ + +#define NES_QPCONTEXT_MISC_IWARP_VER_MASK 0x00000003 +#define NES_QPCONTEXT_MISC_IWARP_VER_SHIFT 0 +#define NES_QPCONTEXT_MISC_EFB_SIZE_MASK 0x000000C0 +#define NES_QPCONTEXT_MISC_EFB_SIZE_SHIFT 6 +#define NES_QPCONTEXT_MISC_RQ_SIZE_MASK 0x00000300 +#define NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT 8 +#define NES_QPCONTEXT_MISC_SQ_SIZE_MASK 0x00000c00 +#define NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT 10 +#define NES_QPCONTEXT_MISC_PCI_FCN_MASK 0x00007000 +#define NES_QPCONTEXT_MISC_PCI_FCN_SHIFT 12 +#define NES_QPCONTEXT_MISC_DUP_ACKS_MASK 0x00070000 +#define NES_QPCONTEXT_MISC_DUP_ACKS_SHIFT 16 + +enum nes_qp_context_misc_bits { + NES_QPCONTEXT_MISC_RX_WQE_SIZE = 0x00000004, + NES_QPCONTEXT_MISC_IPV4 = 0x00000008, + NES_QPCONTEXT_MISC_DO_NOT_FRAG = 0x00000010, + NES_QPCONTEXT_MISC_INSERT_VLAN = 0x00000020, + NES_QPCONTEXT_MISC_DROS = 0x00008000, + NES_QPCONTEXT_MISC_WSCALE = 0x00080000, + NES_QPCONTEXT_MISC_KEEPALIVE = 0x00100000, + NES_QPCONTEXT_MISC_TIMESTAMP = 0x00200000, + NES_QPCONTEXT_MISC_SACK = 0x00400000, + NES_QPCONTEXT_MISC_RDMA_WRITE_EN = 0x00800000, + NES_QPCONTEXT_MISC_RDMA_READ_EN = 0x01000000, + NES_QPCONTEXT_MISC_WBIND_EN = 0x10000000, + NES_QPCONTEXT_MISC_FAST_REGISTER_EN = 0x20000000, + NES_QPCONTEXT_MISC_PRIV_EN = 0x40000000, + NES_QPCONTEXT_MISC_NO_NAGLE = 0x80000000 +}; + +enum nes_qp_acc_wq_sizes { + HCONTEXT_TSA_WQ_SIZE_4 = 0, + HCONTEXT_TSA_WQ_SIZE_32 = 1, + HCONTEXT_TSA_WQ_SIZE_128 = 2, + HCONTEXT_TSA_WQ_SIZE_512 = 3 +}; + +/* QP Context Misc2 Fields */ +#define NES_QPCONTEXT_MISC2_TTL_MASK 0x000000ff +#define NES_QPCONTEXT_MISC2_TTL_SHIFT 0 +#define NES_QPCONTEXT_MISC2_HOP_LIMIT_MASK 0x000000ff +#define NES_QPCONTEXT_MISC2_HOP_LIMIT_SHIFT 0 +#define NES_QPCONTEXT_MISC2_LIMIT_MASK 0x00000300 +#define NES_QPCONTEXT_MISC2_LIMIT_SHIFT 8 +#define NES_QPCONTEXT_MISC2_NIC_INDEX_MASK 0x0000fc00 +#define NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT 10 +#define NES_QPCONTEXT_MISC2_SRC_IP_MASK 0x001f0000 +#define NES_QPCONTEXT_MISC2_SRC_IP_SHIFT 16 +#define NES_QPCONTEXT_MISC2_TOS_MASK 0xff000000 +#define NES_QPCONTEXT_MISC2_TOS_SHIFT 24 +#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_MASK 0xff000000 +#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_SHIFT 24 + +/* QP Context Tcp State/Flow Label Fields */ +#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_MASK 0x000fffff +#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_SHIFT 0 +#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_MASK 0xf0000000 +#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT 28 + +enum nes_qp_tcp_state { + NES_QPCONTEXT_TCPSTATE_CLOSED = 1, + NES_QPCONTEXT_TCPSTATE_EST = 5, + NES_QPCONTEXT_TCPSTATE_TIME_WAIT = 11, +}; + +/* QP Context PD Index/wscale Fields */ +#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK 0x0000000f +#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT 0 +#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK 0x00000f00 +#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT 8 +#define NES_QPCONTEXT_PDWSCALE_PDINDEX_MASK 0xffff0000 +#define NES_QPCONTEXT_PDWSCALE_PDINDEX_SHIFT 16 + +/* QP Context Keepalive Fields */ +#define NES_QPCONTEXT_KEEPALIVE_DELTA_MASK 0x0000ffff +#define NES_QPCONTEXT_KEEPALIVE_DELTA_SHIFT 0 +#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_MASK 0x00ff0000 +#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_SHIFT 16 +#define NES_QPCONTEXT_KEEPALIVE_INTV_MASK 0xff000000 +#define NES_QPCONTEXT_KEEPALIVE_INTV_SHIFT 24 + +/* QP Context ORD/IRD Fields */ +#define NES_QPCONTEXT_ORDIRD_ORDSIZE_MASK 0x0000007f +#define NES_QPCONTEXT_ORDIRD_ORDSIZE_SHIFT 0 +#define NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK 0x00030000 +#define NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT 16 +#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_MASK 0x30000000 +#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT 28 + +enum nes_ord_ird_bits { + NES_QPCONTEXT_ORDIRD_WRPDU = 0x02000000, + NES_QPCONTEXT_ORDIRD_LSMM_PRESENT = 0x04000000, + NES_QPCONTEXT_ORDIRD_ALSMM = 0x08000000, + NES_QPCONTEXT_ORDIRD_AAH = 0x40000000, + NES_QPCONTEXT_ORDIRD_RNMC = 0x80000000 +}; + +enum nes_iwarp_qp_state { + NES_QPCONTEXT_IWARP_STATE_NONEXIST = 0, + NES_QPCONTEXT_IWARP_STATE_IDLE = 1, + NES_QPCONTEXT_IWARP_STATE_RTS = 2, + NES_QPCONTEXT_IWARP_STATE_CLOSING = 3, + NES_QPCONTEXT_IWARP_STATE_TERMINATE = 5, + NES_QPCONTEXT_IWARP_STATE_ERROR = 6 +}; + + +#endif /* NES_CONTEXT_H */ diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c new file mode 100644 index 00000000..d42c9f43 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -0,0 +1,3944 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nes.h" + +static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; +module_param(nes_lro_max_aggr, uint, 0444); +MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); + +static int wide_ppm_offset; +module_param(wide_ppm_offset, int, 0644); +MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); + +static u32 crit_err_count; +u32 int_mod_timer_init; +u32 int_mod_cq_depth_256; +u32 int_mod_cq_depth_128; +u32 int_mod_cq_depth_32; +u32 int_mod_cq_depth_24; +u32 int_mod_cq_depth_16; +u32 int_mod_cq_depth_4; +u32 int_mod_cq_depth_1; +static const u8 nes_max_critical_error_count = 100; +#include "nes_cm.h" + +static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq); +static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count); +static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, + struct nes_adapter *nesadapter, u8 OneG_Mode); +static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); +static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq); +static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq); +static void nes_process_iwarp_aeqe(struct nes_device *nesdev, + struct nes_hw_aeqe *aeqe); +static void process_critical_error(struct nes_device *nesdev); +static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); +static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); +static void nes_terminate_timeout(unsigned long context); +static void nes_terminate_start_timer(struct nes_qp *nesqp); + +#ifdef CONFIG_INFINIBAND_NES_DEBUG +static unsigned char *nes_iwarp_state_str[] = { + "Non-Existent", + "Idle", + "RTS", + "Closing", + "RSVD1", + "Terminate", + "Error", + "RSVD2", +}; + +static unsigned char *nes_tcp_state_str[] = { + "Non-Existent", + "Closed", + "Listen", + "SYN Sent", + "SYN Rcvd", + "Established", + "Close Wait", + "FIN Wait 1", + "Closing", + "Last Ack", + "FIN Wait 2", + "Time Wait", + "RSVD1", + "RSVD2", + "RSVD3", + "RSVD4", +}; +#endif + +static inline void print_ip(struct nes_cm_node *cm_node) +{ + unsigned char *rem_addr; + if (cm_node) { + rem_addr = (unsigned char *)&cm_node->rem_addr; + printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr); + } +} + +/** + * nes_nic_init_timer_defaults + */ +void nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW; + shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH; + if (jumbomode) { + shared_timer->threshold_low = DEFAULT_JUMBO_NES_QL_LOW; + shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET; + shared_timer->threshold_high = DEFAULT_JUMBO_NES_QL_HIGH; + } else { + shared_timer->threshold_low = DEFAULT_NES_QL_LOW; + shared_timer->threshold_target = DEFAULT_NES_QL_TARGET; + shared_timer->threshold_high = DEFAULT_NES_QL_HIGH; + } + + /* todo use netdev->mtu to set thresholds */ + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_nic_init_timer + */ +static void nes_nic_init_timer(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + if (shared_timer->timer_in_use_old == 0) { + nesdev->deepcq_count = 0; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + shared_timer->timer_in_use = NES_NIC_FAST_TIMER; + shared_timer->timer_in_use_old = 0; + + } + if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) { + shared_timer->timer_in_use_old = shared_timer->timer_in_use; + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, + 0x80000000 | ((u32)(shared_timer->timer_in_use*8))); + } + /* todo use netdev->mtu to set thresholds */ + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_nic_tune_timer + */ +static void nes_nic_tune_timer(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + u16 cq_count = nesdev->currcq_count; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + if (shared_timer->cq_count_old <= cq_count) + shared_timer->cq_direction_downward = 0; + else + shared_timer->cq_direction_downward++; + shared_timer->cq_count_old = cq_count; + if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) { + if (cq_count <= shared_timer->threshold_low && + shared_timer->threshold_low > 4) { + shared_timer->threshold_low = shared_timer->threshold_low/2; + shared_timer->cq_direction_downward=0; + nesdev->currcq_count = 0; + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); + return; + } + } + + if (cq_count > 1) { + nesdev->deepcq_count += cq_count; + if (cq_count <= shared_timer->threshold_low) { /* increase timer gently */ + shared_timer->timer_direction_upward++; + shared_timer->timer_direction_downward = 0; + } else if (cq_count <= shared_timer->threshold_target) { /* balanced */ + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + } else if (cq_count <= shared_timer->threshold_high) { /* decrease timer gently */ + shared_timer->timer_direction_downward++; + shared_timer->timer_direction_upward = 0; + } else if (cq_count <= (shared_timer->threshold_high) * 2) { + shared_timer->timer_in_use -= 2; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward++; + } else { + shared_timer->timer_in_use -= 4; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward++; + } + + if (shared_timer->timer_direction_upward > 3 ) { /* using history */ + shared_timer->timer_in_use += 3; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + } + if (shared_timer->timer_direction_downward > 5) { /* using history */ + shared_timer->timer_in_use -= 4 ; + shared_timer->timer_direction_downward = 0; + shared_timer->timer_direction_upward = 0; + } + } + + /* boundary checking */ + if (shared_timer->timer_in_use > shared_timer->threshold_high) + shared_timer->timer_in_use = shared_timer->threshold_high; + else if (shared_timer->timer_in_use < shared_timer->threshold_low) + shared_timer->timer_in_use = shared_timer->threshold_low; + + nesdev->currcq_count = 0; + + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_init_adapter - initialize adapter + */ +struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { + struct nes_adapter *nesadapter = NULL; + unsigned long num_pds; + u32 u32temp; + u32 port_count; + u16 max_rq_wrs; + u16 max_sq_wrs; + u32 max_mr; + u32 max_256pbl; + u32 max_4kpbl; + u32 max_qp; + u32 max_irrq; + u32 max_cq; + u32 hte_index_mask; + u32 adapter_size; + u32 arp_table_size; + u16 vendor_id; + u16 device_id; + u8 OneG_Mode; + u8 func_index; + + /* search the list of existing adapters */ + list_for_each_entry(nesadapter, &nes_adapter_list, list) { + nes_debug(NES_DBG_INIT, "Searching Adapter list for PCI devfn = 0x%X," + " adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n", + nesdev->pcidev->devfn, + PCI_SLOT(nesadapter->devfn), + nesadapter->bus_number, + PCI_SLOT(nesdev->pcidev->devfn), + nesdev->pcidev->bus->number ); + if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) && + (nesadapter->bus_number == nesdev->pcidev->bus->number)) { + nesadapter->ref_count++; + return nesadapter; + } + } + + /* no adapter found */ + num_pds = pci_resource_len(nesdev->pcidev, BAR_1) >> PAGE_SHIFT; + if ((hw_rev != NE020_REV) && (hw_rev != NE020_REV1)) { + nes_debug(NES_DBG_INIT, "NE020 driver detected unknown hardware revision 0x%x\n", + hw_rev); + return NULL; + } + + nes_debug(NES_DBG_INIT, "Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n", + nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8), + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS), + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 4), + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 8)); + + nes_debug(NES_DBG_INIT, "Reset and init NE020\n"); + + + if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0) + return NULL; + + max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE); + nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp); + + u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE); + if (max_qp > ((u32)1 << (u32temp & 0x001f))) { + nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to hash table size = 0x%08X\n", + max_qp, u32temp); + max_qp = (u32)1 << (u32temp & 0x001f); + } + + hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1; + nes_debug(NES_DBG_INIT, "Max QP = %u, hte_index_mask = 0x%08X.\n", + max_qp, hte_index_mask); + + u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT); + + max_irrq = 1 << (u32temp & 0x001f); + + if (max_qp > max_irrq) { + max_qp = max_irrq; + nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to Available Q1s.\n", + max_qp); + } + + /* there should be no reason to allocate more pds than qps */ + if (num_pds > max_qp) + num_pds = max_qp; + + u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE); + max_mr = (u32)8192 << (u32temp & 0x7); + + u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE); + max_256pbl = (u32)1 << (u32temp & 0x0000001f); + max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f); + max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE); + + u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE); + arp_table_size = 1 << u32temp; + + adapter_size = (sizeof(struct nes_adapter) + + (sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1)); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds); + adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size); + adapter_size += sizeof(struct nes_qp **) * max_qp; + + /* allocate a new adapter struct */ + nesadapter = kzalloc(adapter_size, GFP_KERNEL); + if (nesadapter == NULL) { + return NULL; + } + + nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", + nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); + + if (nes_read_eeprom_values(nesdev, nesadapter)) { + printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); + kfree(nesadapter); + return NULL; + } + + nesadapter->vendor_id = (((u32) nesadapter->mac_addr_high) << 8) | + (nesadapter->mac_addr_low >> 24); + + pci_bus_read_config_word(nesdev->pcidev->bus, nesdev->pcidev->devfn, + PCI_DEVICE_ID, &device_id); + nesadapter->vendor_part_id = device_id; + + if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter, + OneG_Mode)) { + kfree(nesadapter); + return NULL; + } + nes_init_csr_ne020(nesdev, hw_rev, port_count); + + memset(nesadapter->pft_mcast_map, 255, + sizeof nesadapter->pft_mcast_map); + + /* populate the new nesadapter */ + nesadapter->devfn = nesdev->pcidev->devfn; + nesadapter->bus_number = nesdev->pcidev->bus->number; + nesadapter->ref_count = 1; + nesadapter->timer_int_req = 0xffff0000; + nesadapter->OneG_Mode = OneG_Mode; + nesadapter->doorbell_start = nesdev->doorbell_region; + + /* nesadapter->tick_delta = clk_divisor; */ + nesadapter->hw_rev = hw_rev; + nesadapter->port_count = port_count; + + nesadapter->max_qp = max_qp; + nesadapter->hte_index_mask = hte_index_mask; + nesadapter->max_irrq = max_irrq; + nesadapter->max_mr = max_mr; + nesadapter->max_256pbl = max_256pbl - 1; + nesadapter->max_4kpbl = max_4kpbl - 1; + nesadapter->max_cq = max_cq; + nesadapter->free_256pbl = max_256pbl - 1; + nesadapter->free_4kpbl = max_4kpbl - 1; + nesadapter->max_pd = num_pds; + nesadapter->arp_table_size = arp_table_size; + + nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT; + if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) { + nesadapter->et_use_adaptive_rx_coalesce = 0; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + } else { + nesadapter->et_use_adaptive_rx_coalesce = 1; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->et_rx_coalesce_usecs_irq = 0; + printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __func__); + } + /* Setup and enable the periodic timer */ + if (nesadapter->et_rx_coalesce_usecs_irq) + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 | + ((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8))); + else + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000); + + nesadapter->base_pd = 1; + + nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; + + nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) + [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); + nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)]; + nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)]; + nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)]; + nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)]; + nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]); + + + /* mark the usual suspect QPs, MR and CQs as in use */ + for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) { + set_bit(u32temp, nesadapter->allocated_qps); + set_bit(u32temp, nesadapter->allocated_cqs); + } + set_bit(0, nesadapter->allocated_mrs); + + for (u32temp = 0; u32temp < 20; u32temp++) + set_bit(u32temp, nesadapter->allocated_pds); + u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES); + + max_rq_wrs = ((u32temp >> 8) & 3); + switch (max_rq_wrs) { + case 0: + max_rq_wrs = 4; + break; + case 1: + max_rq_wrs = 16; + break; + case 2: + max_rq_wrs = 32; + break; + case 3: + max_rq_wrs = 512; + break; + } + + max_sq_wrs = (u32temp & 3); + switch (max_sq_wrs) { + case 0: + max_sq_wrs = 4; + break; + case 1: + max_sq_wrs = 16; + break; + case 2: + max_sq_wrs = 32; + break; + case 3: + max_sq_wrs = 512; + break; + } + nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs); + nesadapter->max_irrq_wr = (u32temp >> 16) & 3; + + nesadapter->max_sge = 4; + nesadapter->max_cqe = 32766; + + if (nes_read_eeprom_values(nesdev, nesadapter)) { + printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); + kfree(nesadapter); + return NULL; + } + + u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG); + nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG, + (u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff)); + + /* setup port configuration */ + if (nesadapter->port_count == 1) { + nesadapter->log_port = 0x00000000; + if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) + nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002); + else + nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); + } else { + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + nesadapter->log_port = 0x000000D8; + } else { + if (nesadapter->port_count == 2) + nesadapter->log_port = 0x00000044; + else + nesadapter->log_port = 0x000000e4; + } + nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); + } + + nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, + nesadapter->log_port); + nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n", + nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT)); + + spin_lock_init(&nesadapter->resource_lock); + spin_lock_init(&nesadapter->phy_lock); + spin_lock_init(&nesadapter->pbl_lock); + spin_lock_init(&nesadapter->periodic_timer_lock); + + INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]); + INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]); + INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]); + INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]); + + if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) { + u32 pcs_control_status0, pcs_control_status1; + u32 reset_value; + u32 i = 0; + u32 int_cnt = 0; + u32 ext_cnt = 0; + unsigned long flags; + u32 j = 0; + + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + + for (i = 0; i < NES_MAX_LINK_CHECK; i++) { + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) + || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) + int_cnt++; + msleep(1); + } + if (int_cnt > 1) { + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); + mh_detected++; + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + reset_value |= 0x0000003d; + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (j++ < 5000)); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + + for (i = 0; i < NES_MAX_LINK_CHECK; i++) { + pcs_control_status0 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) + || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) { + if (++ext_cnt > int_cnt) { + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, + 0x0000F088); + mh_detected++; + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + reset_value |= 0x0000003d; + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (j++ < 5000)); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + break; + } + } + msleep(1); + } + } + } + + if (nesadapter->hw_rev == NE020_REV) { + init_timer(&nesadapter->mh_timer); + nesadapter->mh_timer.function = nes_mh_fix; + nesadapter->mh_timer.expires = jiffies + (HZ/5); /* 1 second */ + nesadapter->mh_timer.data = (unsigned long)nesdev; + add_timer(&nesadapter->mh_timer); + } else { + nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000); + } + + init_timer(&nesadapter->lc_timer); + nesadapter->lc_timer.function = nes_clc; + nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ + nesadapter->lc_timer.data = (unsigned long)nesdev; + add_timer(&nesadapter->lc_timer); + + list_add_tail(&nesadapter->list, &nes_adapter_list); + + for (func_index = 0; func_index < 8; func_index++) { + pci_bus_read_config_word(nesdev->pcidev->bus, + PCI_DEVFN(PCI_SLOT(nesdev->pcidev->devfn), + func_index), 0, &vendor_id); + if (vendor_id == 0xffff) + break; + } + nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __func__, + func_index, pci_name(nesdev->pcidev)); + nesadapter->adapter_fcn_count = func_index; + + return nesadapter; +} + + +/** + * nes_reset_adapter_ne020 + */ +static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode) +{ + u32 port_count; + u32 u32temp; + u32 i; + + u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + port_count = ((u32temp & 0x00000300) >> 8) + 1; + /* TODO: assuming that both SERDES are set the same for now */ + *OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1; + nes_debug(NES_DBG_INIT, "Initial Software Reset = 0x%08X, port_count=%u\n", + u32temp, port_count); + if (*OneG_Mode) + nes_debug(NES_DBG_INIT, "Running in 1G mode.\n"); + u32temp &= 0xff00ffc0; + switch (port_count) { + case 1: + u32temp |= 0x00ee0000; + break; + case 2: + u32temp |= 0x00cc0000; + break; + case 4: + u32temp |= 0x00000000; + break; + default: + return 0; + break; + } + + /* check and do full reset if needed */ + if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) { + nes_debug(NES_DBG_INIT, "Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd); + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd); + + i = 0; + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) + mdelay(1); + if (i > 10000) { + nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n"); + return 0; + } + + i = 0; + while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) + mdelay(1); + if (i > 10000) { + printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); + return 0; + } + } + + /* port reset */ + switch (port_count) { + case 1: + u32temp |= 0x00ee0010; + break; + case 2: + u32temp |= 0x00cc0030; + break; + case 4: + u32temp |= 0x00000030; + break; + } + + nes_debug(NES_DBG_INIT, "Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd); + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd); + + i = 0; + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) + mdelay(1); + if (i > 10000) { + nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n"); + return 0; + } + + /* serdes 0 */ + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) + & 0x0000000f)) != 0x0000000f) && i++ < 5000) + mdelay(1); + if (i > 5000) { + nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp); + return 0; + } + + /* serdes 1 */ + if (port_count > 1) { + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) + & 0x0000000f)) != 0x0000000f) && i++ < 5000) + mdelay(1); + if (i > 5000) { + nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp); + return 0; + } + } + + return port_count; +} + + +/** + * nes_init_serdes + */ +static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, + struct nes_adapter *nesadapter, u8 OneG_Mode) +{ + int i; + u32 u32temp; + u32 sds; + + if (hw_rev != NE020_REV) { + /* init serdes 0 */ + switch (nesadapter->phy_type[0]) { + case NES_PHY_TYPE_CX4: + if (wide_ppm_offset) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + break; + case NES_PHY_TYPE_KR: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); + break; + case NES_PHY_TYPE_PUMA_1G: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); + sds |= 0x00000100; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds); + break; + default: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + break; + } + + if (!OneG_Mode) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); + + if (port_count < 2) + return 0; + + /* init serdes 1 */ + if (!(OneG_Mode && (nesadapter->phy_type[1] != NES_PHY_TYPE_PUMA_1G))) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); + + switch (nesadapter->phy_type[1]) { + case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); + break; + case NES_PHY_TYPE_CX4: + if (wide_ppm_offset) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA); + break; + case NES_PHY_TYPE_KR: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); + break; + case NES_PHY_TYPE_PUMA_1G: + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + sds |= 0x000000100; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); + } + if (!OneG_Mode) { + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + sds &= 0xFFFFFFBF; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); + } + } else { + /* init serdes 0 */ + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) + & 0x0000000f)) != 0x0000000f) && i++ < 5000) + mdelay(1); + if (i > 5000) { + nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp); + return 1; + } + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000); + if (OneG_Mode) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222); + + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff); + if (port_count > 1) { + /* init serdes 1 */ + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048); + i = 0; + while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) + & 0x0000000f)) != 0x0000000f) && (i++ < 5000)) + mdelay(1); + if (i > 5000) { + printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp); + /* return 1; */ + } + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff); + } + } + return 0; +} + + +/** + * nes_init_csr_ne020 + * Initialize registers for ne020 hardware + */ +static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) +{ + u32 u32temp; + + nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count); + + nes_write_indexed(nesdev, 0x000001E4, 0x00000007); + /* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */ + nes_write_indexed(nesdev, 0x000001E8, 0x00020874); + nes_write_indexed(nesdev, 0x000001D8, 0x00048002); + /* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */ + nes_write_indexed(nesdev, 0x000001FC, 0x00050005); + nes_write_indexed(nesdev, 0x00000600, 0x55555555); + nes_write_indexed(nesdev, 0x00000604, 0x55555555); + + /* TODO: move these MAC register settings to NIC bringup */ + nes_write_indexed(nesdev, 0x00002000, 0x00000001); + nes_write_indexed(nesdev, 0x00002004, 0x00000001); + nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000200C, 0x00000001); + nes_write_indexed(nesdev, 0x00002010, 0x000003c1); + nes_write_indexed(nesdev, 0x0000201C, 0x75345678); + if (port_count > 1) { + nes_write_indexed(nesdev, 0x00002200, 0x00000001); + nes_write_indexed(nesdev, 0x00002204, 0x00000001); + nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000220C, 0x00000001); + nes_write_indexed(nesdev, 0x00002210, 0x000003c1); + nes_write_indexed(nesdev, 0x0000221C, 0x75345678); + nes_write_indexed(nesdev, 0x00000908, 0x20000001); + } + if (port_count > 2) { + nes_write_indexed(nesdev, 0x00002400, 0x00000001); + nes_write_indexed(nesdev, 0x00002404, 0x00000001); + nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000240C, 0x00000001); + nes_write_indexed(nesdev, 0x00002410, 0x000003c1); + nes_write_indexed(nesdev, 0x0000241C, 0x75345678); + nes_write_indexed(nesdev, 0x00000910, 0x20000001); + + nes_write_indexed(nesdev, 0x00002600, 0x00000001); + nes_write_indexed(nesdev, 0x00002604, 0x00000001); + nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF); + nes_write_indexed(nesdev, 0x0000260C, 0x00000001); + nes_write_indexed(nesdev, 0x00002610, 0x000003c1); + nes_write_indexed(nesdev, 0x0000261C, 0x75345678); + nes_write_indexed(nesdev, 0x00000918, 0x20000001); + } + + nes_write_indexed(nesdev, 0x00005000, 0x00018000); + /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */ + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) | + 0x00000001); + nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F); + nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF); + + /* TODO: move this to code, get from EEPROM */ + nes_write_indexed(nesdev, 0x00000900, 0x20000001); + nes_write_indexed(nesdev, 0x000060C0, 0x0000028e); + nes_write_indexed(nesdev, 0x000060C8, 0x00000020); + + nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0); + /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */ + + if (hw_rev != NE020_REV) { + u32temp = nes_read_indexed(nesdev, 0x000008e8); + u32temp |= 0x80000000; + nes_write_indexed(nesdev, 0x000008e8, u32temp); + u32temp = nes_read_indexed(nesdev, 0x000021f8); + u32temp &= 0x7fffffff; + u32temp |= 0x7fff0010; + nes_write_indexed(nesdev, 0x000021f8, u32temp); + if (port_count > 1) { + u32temp = nes_read_indexed(nesdev, 0x000023f8); + u32temp &= 0x7fffffff; + u32temp |= 0x7fff0010; + nes_write_indexed(nesdev, 0x000023f8, u32temp); + } + } +} + + +/** + * nes_destroy_adapter - destroy the adapter structure + */ +void nes_destroy_adapter(struct nes_adapter *nesadapter) +{ + struct nes_adapter *tmp_adapter; + + list_for_each_entry(tmp_adapter, &nes_adapter_list, list) { + nes_debug(NES_DBG_SHUTDOWN, "Nes Adapter list entry = 0x%p.\n", + tmp_adapter); + } + + nesadapter->ref_count--; + if (!nesadapter->ref_count) { + if (nesadapter->hw_rev == NE020_REV) { + del_timer(&nesadapter->mh_timer); + } + del_timer(&nesadapter->lc_timer); + + list_del(&nesadapter->list); + kfree(nesadapter); + } +} + + +/** + * nes_init_cqp + */ +int nes_init_cqp(struct nes_device *nesdev) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_cqp_qp_context *cqp_qp_context; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_ceq *ceq; + struct nes_hw_ceq *nic_ceq; + struct nes_hw_aeq *aeq; + void *vmem; + dma_addr_t pmem; + u32 count=0; + u32 cqp_head; + u64 u64temp; + u32 u32temp; + + /* allocate CQP memory */ + /* Need to add max_cq to the aeq size once cq overflow checking is added back */ + /* SQ is 512 byte aligned, others are 256 byte aligned */ + nesdev->cqp_mem_size = 512 + + (sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) + + (sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) + + max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) + + max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) + + (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) + + sizeof(struct nes_hw_cqp_qp_context); + + nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size, + &nesdev->cqp_pbase); + if (!nesdev->cqp_vbase) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n"); + return -ENOMEM; + } + memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size); + + /* Allocate a twice the number of CQP requests as the SQ size */ + nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * + 2 * NES_CQP_SQ_SIZE, GFP_KERNEL); + if (nesdev->nes_cqp_requests == NULL) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n"); + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, + nesdev->cqp.sq_pbase); + return -ENOMEM; + } + + nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n", + nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size); + + spin_lock_init(&nesdev->cqp.lock); + init_waitqueue_head(&nesdev->cqp.waitq); + + /* Setup Various Structures */ + vmem = (void *)(((unsigned long)nesdev->cqp_vbase + (512 - 1)) & + ~(unsigned long)(512 - 1)); + pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) & + ~(unsigned long long)(512 - 1)); + + nesdev->cqp.sq_vbase = vmem; + nesdev->cqp.sq_pbase = pmem; + nesdev->cqp.sq_size = NES_CQP_SQ_SIZE; + nesdev->cqp.sq_head = 0; + nesdev->cqp.sq_tail = 0; + nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn); + + vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size); + pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size); + + nesdev->ccq.cq_vbase = vmem; + nesdev->ccq.cq_pbase = pmem; + nesdev->ccq.cq_size = NES_CCQ_SIZE; + nesdev->ccq.cq_head = 0; + nesdev->ccq.ce_handler = nes_cqp_ce_handler; + nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn); + + vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size); + pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size); + + nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn); + ceq = &nesadapter->ceq[nesdev->ceq_index]; + ceq->ceq_vbase = vmem; + ceq->ceq_pbase = pmem; + ceq->ceq_size = NES_CCEQ_SIZE; + ceq->ceq_head = 0; + + vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256); + pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256); + + nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8; + nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index]; + nic_ceq->ceq_vbase = vmem; + nic_ceq->ceq_pbase = pmem; + nic_ceq->ceq_size = NES_NIC_CEQ_SIZE; + nic_ceq->ceq_head = 0; + + vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256); + pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256); + + aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]; + aeq->aeq_vbase = vmem; + aeq->aeq_pbase = pmem; + aeq->aeq_size = nesadapter->max_qp; + aeq->aeq_head = 0; + + /* Setup QP Context */ + vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size); + pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size); + + cqp_qp_context = vmem; + cqp_qp_context->context_words[0] = + cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10)); + cqp_qp_context->context_words[1] = 0; + cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase); + cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32); + + + /* Write the address to Create CQP */ + if ((sizeof(dma_addr_t) > 4)) { + nes_write_indexed(nesdev, + NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), + ((u64)pmem) >> 32); + } else { + nes_write_indexed(nesdev, + NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0); + } + nes_write_indexed(nesdev, + NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8), + (u32)pmem); + + INIT_LIST_HEAD(&nesdev->cqp_avail_reqs); + INIT_LIST_HEAD(&nesdev->cqp_pending_reqs); + + for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) { + init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq); + list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs); + } + + /* Write Create CCQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nesdev->ccq.cq_number | + ((u32)nesdev->ceq_index << 16))); + u64temp = (u64)nesdev->ccq.cq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (unsigned long)&nesdev->ccq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = + cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + + /* Write Create CEQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, ceq->ceq_size); + u64temp = (u64)ceq->ceq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + + /* Write Create AEQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX, aeq->aeq_size); + u64temp = (u64)aeq->aeq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + + /* Write Create NIC CEQ WQE */ + cqp_head = nesdev->cqp.sq_head++; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, nic_ceq->ceq_size); + u64temp = (u64)nic_ceq->ceq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + + /* Poll until CCQP done */ + count = 0; + do { + if (count++ > 1000) { + printk(KERN_ERR PFX "Error creating CQP\n"); + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, + nesdev->cqp_vbase, nesdev->cqp_pbase); + return -1; + } + udelay(10); + } while (!(nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8))); + + nes_debug(NES_DBG_INIT, "CQP Status = 0x%08X\n", nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); + + u32temp = 0x04800000; + nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id); + + /* wait for the CCQ, CEQ, and AEQ to get created */ + count = 0; + do { + if (count++ > 1000) { + printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n"); + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, + nesdev->cqp_vbase, nesdev->cqp_pbase); + return -1; + } + udelay(10); + } while (((nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8))); + + /* dump the QP status value */ + nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); + + nesdev->cqp.sq_tail++; + + return 0; +} + + +/** + * nes_destroy_cqp + */ +int nes_destroy_cqp(struct nes_device *nesdev) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + u32 count = 0; + u32 cqp_head; + unsigned long flags; + + do { + if (count++ > 1000) + break; + udelay(10); + } while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail)); + + /* Reset CCQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET | + nesdev->ccq.cq_number); + + /* Disable device interrupts */ + nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy the AEQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ | + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; + + /* Destroy the NIC CEQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | + ((u32)nesdev->nic_ceq_index << 8)); + + /* Destroy the CEQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | + (nesdev->ceq_index << 8)); + + /* Destroy the CCQ */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number | + ((u32)nesdev->ceq_index << 16)); + + /* Destroy CQP */ + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP | + NES_CQP_QP_TYPE_CQP); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id); + + barrier(); + /* Ring doorbell (5 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + /* wait for the CCQ, CEQ, and AEQ to get destroyed */ + count = 0; + do { + if (count++ > 1000) { + printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n", + PCI_FUNC(nesdev->pcidev->devfn)); + break; + } + udelay(10); + } while (((nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0)); + + /* dump the QP status value */ + nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n", + PCI_FUNC(nesdev->pcidev->devfn), + nes_read_indexed(nesdev, + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); + + kfree(nesdev->nes_cqp_requests); + + /* Free the control structures */ + pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, + nesdev->cqp.sq_pbase); + + return 0; +} + + +/** + * nes_init_1g_phy + */ +static int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) +{ + u32 counter = 0; + u16 phy_data; + int ret = 0; + + nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); + + /* Reset the PHY */ + nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); + udelay(100); + counter = 0; + do { + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + if (counter++ > 100) { + ret = -1; + break; + } + } while (phy_data & 0x8000); + + /* Setting no phy loopback */ + phy_data &= 0xbfff; + phy_data |= 0x1140; + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); + nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); + + /* Setting the interrupt mask */ + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); + + /* turning on flow control */ + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00); + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); + + /* Clear Half duplex */ + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); + + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); + + return ret; +} + + +/** + * nes_init_2025_phy + */ +static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) +{ + u32 temp_phy_data = 0; + u32 temp_phy_data2 = 0; + u32 counter = 0; + u32 sds; + u32 mac_index = nesdev->mac_index; + int ret = 0; + unsigned int first_attempt = 1; + + /* Check firmware heartbeat */ + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + udelay(1500); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + if (temp_phy_data != temp_phy_data2) { + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((temp_phy_data & 0xff) > 0x20) + return 0; + printk(PFX "Reinitialize external PHY\n"); + } + + /* no heartbeat, configure the PHY */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + + switch (phy_type) { + case NES_PHY_TYPE_ARGUS: + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + + /* setup LEDs */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); + break; + + case NES_PHY_TYPE_SFP_D: + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + + /* setup LEDs */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); + break; + + case NES_PHY_TYPE_KR: + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + + /* setup LEDs */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004); + + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020); + break; + } + + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); + + /* Bring PHY out of reset */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); + + /* Check for heartbeat */ + counter = 0; + mdelay(690); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + do { + if (counter++ > 150) { + printk(PFX "No PHY heartbeat\n"); + break; + } + mdelay(1); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } while ((temp_phy_data2 == temp_phy_data)); + + /* wait for tracking */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 300) { + if (((temp_phy_data & 0xff) == 0x0) && first_attempt) { + first_attempt = 0; + counter = 0; + /* reset AMCC PHY and try again */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040); + continue; + } else { + ret = 1; + break; + } + } + mdelay(10); + } while ((temp_phy_data & 0xff) < 0x30); + + /* setup signal integrity */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032); + if (phy_type == NES_PHY_TYPE_KR) { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C); + } else { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063); + } + + /* reset serdes */ + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200); + sds |= 0x1; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); + sds &= 0xfffffffe; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); + + counter = 0; + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) + && (counter++ < 5000)) + ; + + return ret; +} + + +/** + * nes_init_phy + */ +int nes_init_phy(struct nes_device *nesdev) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 mac_index = nesdev->mac_index; + u32 tx_config = 0; + unsigned long flags; + u8 phy_type = nesadapter->phy_type[mac_index]; + u8 phy_index = nesadapter->phy_index[mac_index]; + int ret = 0; + + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + if (phy_type == NES_PHY_TYPE_1G) { + /* setup 1G MDIO operation */ + tx_config &= 0xFFFFFFE3; + tx_config |= 0x04; + } else { + /* setup 10G MDIO operation */ + tx_config &= 0xFFFFFFE3; + tx_config |= 0x1D; + } + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + + switch (phy_type) { + case NES_PHY_TYPE_1G: + ret = nes_init_1g_phy(nesdev, phy_type, phy_index); + break; + case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: + case NES_PHY_TYPE_KR: + ret = nes_init_2025_phy(nesdev, phy_type, phy_index); + break; + } + + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + + return ret; +} + + +/** + * nes_replenish_nic_rq + */ +static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) +{ + unsigned long flags; + dma_addr_t bus_address; + struct sk_buff *skb; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_hw_nic *nesnic; + struct nes_device *nesdev; + struct nes_rskb_cb *cb; + u32 rx_wqes_posted = 0; + + nesnic = &nesvnic->nic; + nesdev = nesvnic->nesdev; + spin_lock_irqsave(&nesnic->rq_lock, flags); + if (nesnic->replenishing_rq !=0) { + if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) && + (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) { + atomic_set(&nesvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */ + add_timer(&nesvnic->rq_wqes_timer); + } else + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + return; + } + nesnic->replenishing_rq = 1; + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + do { + skb = dev_alloc_skb(nesvnic->max_frame_size); + if (skb) { + skb->dev = nesvnic->netdev; + + bus_address = pci_map_single(nesdev->pcidev, + skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = bus_address; + cb->maplen = nesvnic->max_frame_size; + + nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head]; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = + cpu_to_le32(nesvnic->max_frame_size); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)bus_address); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)bus_address >> 32)); + nesnic->rx_skb[nesnic->rq_head] = skb; + nesnic->rq_head++; + nesnic->rq_head &= nesnic->rq_size - 1; + atomic_dec(&nesvnic->rx_skbs_needed); + barrier(); + if (++rx_wqes_posted == 255) { + nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); + rx_wqes_posted = 0; + } + } else { + spin_lock_irqsave(&nesnic->rq_lock, flags); + if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) && + (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) { + atomic_set(&nesvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */ + add_timer(&nesvnic->rq_wqes_timer); + } else + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + break; + } + } while (atomic_read(&nesvnic->rx_skbs_needed)); + barrier(); + if (rx_wqes_posted) + nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); + nesnic->replenishing_rq = 0; +} + + +/** + * nes_rq_wqes_timeout + */ +static void nes_rq_wqes_timeout(unsigned long parm) +{ + struct nes_vnic *nesvnic = (struct nes_vnic *)parm; + printk("%s: Timer fired.\n", __func__); + atomic_set(&nesvnic->rx_skb_timer_running, 0); + if (atomic_read(&nesvnic->rx_skbs_needed)) + nes_replenish_nic_rq(nesvnic); +} + + +static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + return 0; +} + + +/** + * nes_init_nic_qp + */ +int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct nes_hw_nic_qp_context *nic_context; + struct sk_buff *skb; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_vnic *nesvnic = netdev_priv(netdev); + unsigned long flags; + void *vmem; + dma_addr_t pmem; + u64 u64temp; + int ret; + u32 cqp_head; + u32 counter; + u32 wqe_count; + struct nes_rskb_cb *cb; + u8 jumbomode=0; + + /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */ + nesvnic->nic_mem_size = 256 + + (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)) + + (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)) + + (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)) + + (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) + + sizeof(struct nes_hw_nic_qp_context); + + nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size, + &nesvnic->nic_pbase); + if (!nesvnic->nic_vbase) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n"); + return -ENOMEM; + } + memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size); + nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n", + nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size); + + vmem = (void *)(((unsigned long)nesvnic->nic_vbase + (256 - 1)) & + ~(unsigned long)(256 - 1)); + pmem = (dma_addr_t)(((unsigned long long)nesvnic->nic_pbase + (256 - 1)) & + ~(unsigned long long)(256 - 1)); + + /* Setup the first Fragment buffers */ + nesvnic->nic.first_frag_vbase = vmem; + + for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) { + nesvnic->nic.frag_paddr[counter] = pmem; + pmem += sizeof(struct nes_first_frag); + } + + /* setup the SQ */ + vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)); + + nesvnic->nic.sq_vbase = (void *)vmem; + nesvnic->nic.sq_pbase = pmem; + nesvnic->nic.sq_head = 0; + nesvnic->nic.sq_tail = 0; + nesvnic->nic.sq_size = NES_NIC_WQ_SIZE; + for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) { + nic_sqe = &nesvnic->nic.sq_vbase[counter]; + nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_NIC_SQ_WQE_DISABLE_CHKSUM | + NES_NIC_SQ_WQE_COMPLETION); + nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX] = + cpu_to_le32((u32)NES_FIRST_FRAG_SIZE << 16); + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)nesvnic->nic.frag_paddr[counter]); + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)nesvnic->nic.frag_paddr[counter] >> 32)); + } + + nesvnic->get_cqp_request = nes_get_cqp_request; + nesvnic->post_cqp_request = nes_post_cqp_request; + nesvnic->mcrq_mcast_filter = NULL; + + spin_lock_init(&nesvnic->nic.rq_lock); + + /* setup the RQ */ + vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)); + pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)); + + + nesvnic->nic.rq_vbase = vmem; + nesvnic->nic.rq_pbase = pmem; + nesvnic->nic.rq_head = 0; + nesvnic->nic.rq_tail = 0; + nesvnic->nic.rq_size = NES_NIC_WQ_SIZE; + + /* setup the CQ */ + vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)); + pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)); + + if (nesdev->nesadapter->netdev_count > 2) + nesvnic->mcrq_qp_id = nesvnic->nic_index + 32; + else + nesvnic->mcrq_qp_id = nesvnic->nic.qp_id + 4; + + nesvnic->nic_cq.cq_vbase = vmem; + nesvnic->nic_cq.cq_pbase = pmem; + nesvnic->nic_cq.cq_head = 0; + nesvnic->nic_cq.cq_size = NES_NIC_WQ_SIZE * 2; + + nesvnic->nic_cq.ce_handler = nes_nic_napi_ce_handler; + + /* Send CreateCQ request to CQP */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + cqp_head = nesdev->cqp.sq_head; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + ((u32)nesvnic->nic_cq.cq_size << 16)); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( + nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)); + u64temp = (u64)nesvnic->nic_cq.cq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (unsigned long)&nesvnic->nic_cq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + /* Send CreateQP request to CQP */ + nic_context = (void *)(&nesvnic->nic_cq.cq_vbase[nesvnic->nic_cq.cq_size]); + nic_context->context_words[NES_NIC_CTX_MISC_IDX] = + cpu_to_le32((u32)NES_NIC_CTX_SIZE | + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12)); + nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); + if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) { + nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); + } + + u64temp = (u64)nesvnic->nic.sq_pbase; + nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)nesvnic->nic.rq_pbase; + nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | + NES_CQP_QP_TYPE_NIC); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id); + u64temp = (u64)nesvnic->nic_cq.cq_pbase + + (nesvnic->nic_cq.cq_size * sizeof(struct nes_hw_nic_cqe)); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + nesdev->cqp.sq_head = cqp_head; + + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n", + nesvnic->nic.qp_id); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_INIT, "Create NIC QP%u completed, wait_event_timeout ret = %u.\n", + nesvnic->nic.qp_id, ret); + if (!ret) { + nes_debug(NES_DBG_INIT, "NIC QP%u create timeout expired\n", nesvnic->nic.qp_id); + pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, + nesvnic->nic_pbase); + return -EIO; + } + + /* Populate the RQ */ + for (counter = 0; counter < (NES_NIC_WQ_SIZE - 1); counter++) { + skb = dev_alloc_skb(nesvnic->max_frame_size); + if (!skb) { + nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name); + + nes_destroy_nic_qp(nesvnic); + return -ENOMEM; + } + + skb->dev = netdev; + + pmem = pci_map_single(nesdev->pcidev, skb->data, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = pmem; + cb->maplen = nesvnic->max_frame_size; + + nic_rqe = &nesvnic->nic.rq_vbase[counter]; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); + nesvnic->nic.rx_skb[counter] = skb; + } + + wqe_count = NES_NIC_WQ_SIZE - 1; + nesvnic->nic.rq_head = wqe_count; + barrier(); + do { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id); + } while (wqe_count); + init_timer(&nesvnic->rq_wqes_timer); + nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout; + nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic; + nes_debug(NES_DBG_INIT, "NAPI support Enabled\n"); + if (nesdev->nesadapter->et_use_adaptive_rx_coalesce) + { + nes_nic_init_timer(nesdev); + if (netdev->mtu > 1500) + jumbomode = 1; + nes_nic_init_timer_defaults(nesdev, jumbomode); + } + if ((nesdev->nesadapter->allow_unaligned_fpdus) && + (nes_init_mgt_qp(nesdev, netdev, nesvnic))) { + nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name); + nes_destroy_nic_qp(nesvnic); + return -ENOMEM; + } + + nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; + nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; + nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; + nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; + nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; + nesvnic->lro_mgr.dev = netdev; + nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; + nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; + return 0; +} + + +/** + * nes_destroy_nic_qp + */ +void nes_destroy_nic_qp(struct nes_vnic *nesvnic) +{ + u64 u64temp; + dma_addr_t bus_address; + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_nic_sq_wqe *nic_sqe; + __le16 *wqe_fragment_length; + u16 wqe_fragment_index; + u32 cqp_head; + u32 wqm_cfg0; + unsigned long flags; + struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; + int ret; + + if (nesdev->nesadapter->allow_unaligned_fpdus) + nes_destroy_mgt(nesvnic); + + /* clear wqe stall before destroying NIC QP */ + wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF); + + /* Free remaining NIC receive buffers */ + while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { + rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail]; + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, + PCI_DMA_FROMDEVICE); + + dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); + nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); + } + + /* Free remaining NIC transmit buffers */ + while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) { + nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail]; + wqe_fragment_index = 1; + wqe_fragment_length = (__le16 *) + &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* bump past the vlan tag */ + wqe_fragment_length++; + if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { + u64temp = (u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + if (test_and_clear_bit(nesvnic->nic.sq_tail, + nesvnic->nic.first_frag_overflow)) { + pci_unmap_single(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[ + wqe_fragment_index++]), + PCI_DMA_TODEVICE); + } + for (; wqe_fragment_index < 5; wqe_fragment_index++) { + if (wqe_fragment_length[wqe_fragment_index]) { + u64temp = le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+ + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + pci_unmap_page(nesdev->pcidev, + bus_address, + le16_to_cpu( + wqe_fragment_length[ + wqe_fragment_index]), + PCI_DMA_TODEVICE); + } else + break; + } + } + if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]) + dev_kfree_skb( + nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]); + + nesvnic->nic.sq_tail = (nesvnic->nic.sq_tail + 1) + & (nesvnic->nic.sq_size - 1); + } + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy NIC QP */ + cqp_head = nesdev->cqp.sq_head; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + nesvnic->nic.qp_id); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + + /* Destroy NIC CQ */ + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16))); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + nesdev->cqp.sq_head = cqp_head; + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u," + " cqp.sq_tail=%u, cqp.sq_size=%u\n", + cqp_head, nesdev->cqp.sq_head, + nesdev->cqp.sq_tail, nesdev->cqp.sq_size); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + + nes_debug(NES_DBG_SHUTDOWN, "Destroy NIC QP returned, wait_event_timeout ret = %u, cqp_head=%u," + " cqp.sq_head=%u, cqp.sq_tail=%u\n", + ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); + if (!ret) { + nes_debug(NES_DBG_SHUTDOWN, "NIC QP%u destroy timeout expired\n", + nesvnic->nic.qp_id); + } + + pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, + nesvnic->nic_pbase); + + /* restore old wqm_cfg0 value */ + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0); +} + +/** + * nes_napi_isr + */ +int nes_napi_isr(struct nes_device *nesdev) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 int_stat; + + if (nesdev->napi_isr_ran) { + /* interrupt status has already been read in ISR */ + int_stat = nesdev->int_stat; + } else { + int_stat = nes_read32(nesdev->regs + NES_INT_STAT); + nesdev->int_stat = int_stat; + nesdev->napi_isr_ran = 1; + } + + int_stat &= nesdev->int_req; + /* iff NIC, process here, else wait for DPC */ + if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) { + nesdev->napi_isr_ran = 0; + nes_write32(nesdev->regs + NES_INT_STAT, + (int_stat & + ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); + + /* Process the CEQs */ + nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]); + + if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) && + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesadapter->et_use_adaptive_rx_coalesce) && + (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) { + if ((nesdev->int_req & NES_INT_TIMER) == 0) { + /* Enable Periodic timer interrupts */ + nesdev->int_req |= NES_INT_TIMER; + /* ack any pending periodic timer interrupts so we don't get an immediate interrupt */ + /* TODO: need to also ack other unused periodic timer values, get from nesadapter */ + nes_write32(nesdev->regs+NES_TIMER_STAT, + nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); + nes_write32(nesdev->regs+NES_INTF_INT_MASK, + ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER)); + } + + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + nes_nic_init_timer(nesdev); + } + /* Enable interrupts, except CEQs */ + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); + } else { + /* Enable interrupts, make sure timer is off */ + nesdev->int_req &= ~NES_INT_TIMER; + nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + } + nesdev->deepcq_count = 0; + return 1; + } else { + return 0; + } +} + +static void process_critical_error(struct nes_device *nesdev) +{ + u32 debug_error; + u32 nes_idx_debug_error_masks0 = 0; + u16 error_module = 0; + + debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); + printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", + (u16)debug_error); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, + 0x01010000 | (debug_error & 0x0000ffff)); + if (crit_err_count++ > 10) + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + error_module = (u16) (debug_error & 0x1F00) >> 8; + if (++nesdev->nesadapter->crit_error_count[error_module-1] >= + nes_max_critical_error_count) { + printk(KERN_ERR PFX "Masking off critical error for module " + "0x%02X\n", (u16)error_module); + nes_idx_debug_error_masks0 = nes_read_indexed(nesdev, + NES_IDX_DEBUG_ERROR_MASKS0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, + nes_idx_debug_error_masks0 | (1 << error_module)); + } +} +/** + * nes_dpc + */ +void nes_dpc(unsigned long param) +{ + struct nes_device *nesdev = (struct nes_device *)param; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 counter; + u32 loop_counter = 0; + u32 int_status_bit; + u32 int_stat; + u32 timer_stat; + u32 temp_int_stat; + u32 intf_int_stat; + u32 processed_intf_int = 0; + u16 processed_timer_int = 0; + u16 completion_ints = 0; + u16 timer_ints = 0; + + /* nes_debug(NES_DBG_ISR, "\n"); */ + + do { + timer_stat = 0; + if (nesdev->napi_isr_ran) { + nesdev->napi_isr_ran = 0; + int_stat = nesdev->int_stat; + } else + int_stat = nes_read32(nesdev->regs+NES_INT_STAT); + if (processed_intf_int != 0) + int_stat &= nesdev->int_req & ~NES_INT_INTF; + else + int_stat &= nesdev->int_req; + if (processed_timer_int == 0) { + processed_timer_int = 1; + if (int_stat & NES_INT_TIMER) { + timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT); + if ((timer_stat & nesdev->timer_int_req) == 0) { + int_stat &= ~NES_INT_TIMER; + } + } + } else { + int_stat &= ~NES_INT_TIMER; + } + + if (int_stat) { + if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| + NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) { + /* Ack the interrupts */ + nes_write32(nesdev->regs+NES_INT_STAT, + (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| + NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); + } + + temp_int_stat = int_stat; + for (counter = 0, int_status_bit = 1; counter < 16; counter++) { + if (int_stat & int_status_bit) { + nes_process_ceq(nesdev, &nesadapter->ceq[counter]); + temp_int_stat &= ~int_status_bit; + completion_ints = 1; + } + if (!(temp_int_stat & 0x0000ffff)) + break; + int_status_bit <<= 1; + } + + /* Process the AEQ for this pci function */ + int_status_bit = 1 << (16 + PCI_FUNC(nesdev->pcidev->devfn)); + if (int_stat & int_status_bit) { + nes_process_aeq(nesdev, &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]); + } + + /* Process the MAC interrupt for this pci function */ + int_status_bit = 1 << (24 + nesdev->mac_index); + if (int_stat & int_status_bit) { + nes_process_mac_intr(nesdev, nesdev->mac_index); + } + + if (int_stat & NES_INT_TIMER) { + if (timer_stat & nesdev->timer_int_req) { + nes_write32(nesdev->regs + NES_TIMER_STAT, + (timer_stat & nesdev->timer_int_req) | + ~(nesdev->nesadapter->timer_int_req)); + timer_ints = 1; + } + } + + if (int_stat & NES_INT_INTF) { + processed_intf_int = 1; + intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); + intf_int_stat &= nesdev->intf_int_req; + if (NES_INTF_INT_CRITERR & intf_int_stat) { + process_critical_error(nesdev); + } + if (NES_INTF_INT_PCIERR & intf_int_stat) { + printk(KERN_ERR PFX "PCI Error reported by device!!!\n"); + BUG(); + } + if (NES_INTF_INT_AEQ_OFLOW & intf_int_stat) { + printk(KERN_ERR PFX "AEQ Overflow reported by device!!!\n"); + BUG(); + } + nes_write32(nesdev->regs+NES_INTF_INT_STAT, intf_int_stat); + } + + if (int_stat & NES_INT_TSW) { + } + } + /* Don't use the interface interrupt bit stay in loop */ + int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | + NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3; + } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS)); + + if (timer_ints == 1) { + if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) { + if (completion_ints == 0) { + nesdev->timer_only_int_count++; + if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) { + nesdev->timer_only_int_count = 0; + nesdev->int_req &= ~NES_INT_TIMER; + nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req); + } else { + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); + } + } else { + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + nes_nic_init_timer(nesdev); + } + nesdev->timer_only_int_count = 0; + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); + } + } else { + nesdev->timer_only_int_count = 0; + nesdev->int_req &= ~NES_INT_TIMER; + nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); + nes_write32(nesdev->regs+NES_TIMER_STAT, + nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + } + } else { + if ( (completion_ints == 1) && + (((nesadapter->et_rx_coalesce_usecs_irq) && + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) && + (nesadapter->et_use_adaptive_rx_coalesce) )) ) { + /* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */ + nesdev->timer_only_int_count = 0; + nesdev->int_req |= NES_INT_TIMER; + nes_write32(nesdev->regs+NES_TIMER_STAT, + nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); + nes_write32(nesdev->regs+NES_INTF_INT_MASK, + ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER)); + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); + } else { + nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + } + } + nesdev->deepcq_count = 0; +} + + +/** + * nes_process_ceq + */ +static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) +{ + u64 u64temp; + struct nes_hw_cq *cq; + u32 head; + u32 ceq_size; + + /* nes_debug(NES_DBG_CQ, "\n"); */ + head = ceq->ceq_head; + ceq_size = ceq->ceq_size; + + do { + if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) & + NES_CEQE_VALID) { + u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) | + ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX]))); + u64temp <<= 1; + cq = *((struct nes_hw_cq **)&u64temp); + /* nes_debug(NES_DBG_CQ, "pCQ = %p\n", cq); */ + barrier(); + ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX] = 0; + + /* call the event handler */ + cq->ce_handler(nesdev, cq); + + if (++head >= ceq_size) + head = 0; + } else { + break; + } + + } while (1); + + ceq->ceq_head = head; +} + + +/** + * nes_process_aeq + */ +static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) +{ + /* u64 u64temp; */ + u32 head; + u32 aeq_size; + u32 aeqe_misc; + u32 aeqe_cq_id; + struct nes_hw_aeqe volatile *aeqe; + + head = aeq->aeq_head; + aeq_size = aeq->aeq_size; + + do { + aeqe = &aeq->aeq_vbase[head]; + if ((le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]) & NES_AEQE_VALID) == 0) + break; + aeqe_misc = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); + if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) { + if (aeqe_cq_id >= NES_FIRST_QPN) { + /* dealing with an accelerated QP related AE */ + /* + * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) | + * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]))); + */ + nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe); + } else { + /* TODO: dealing with a CQP related AE */ + nes_debug(NES_DBG_AEQ, "Processing CQP related AE, misc = 0x%04X\n", + (u16)(aeqe_misc >> 16)); + } + } + + aeqe->aeqe_words[NES_AEQE_MISC_IDX] = 0; + + if (++head >= aeq_size) + head = 0; + + nes_write32(nesdev->regs + NES_AEQ_ALLOC, 1 << 16); + } + while (1); + aeq->aeq_head = head; +} + +static void nes_reset_link(struct nes_device *nesdev, u32 mac_index) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 reset_value; + u32 i=0; + u32 u32temp; + + if (nesadapter->hw_rev == NE020_REV) { + return; + } + mh_detected++; + + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + + if ((mac_index == 0) || ((mac_index == 1) && (nesadapter->OneG_Mode))) + reset_value |= 0x0000001d; + else + reset_value |= 0x0000002d; + + if (4 <= (nesadapter->link_interrupt_count[mac_index] / ((u16)NES_MAX_LINK_INTERRUPTS))) { + if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) { + nesadapter->link_interrupt_count[0] = 0; + nesadapter->link_interrupt_count[1] = 0; + u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + if (0x00000040 & u32temp) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); + + reset_value |= 0x0000003d; + } + nesadapter->link_interrupt_count[mac_index] = 0; + } + + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (i++ < 5000)); + + if (0x0000003d == (reset_value & 0x0000003d)) { + u32 pcs_control_status0, pcs_control_status1; + + for (i = 0; i < 10; i++) { + pcs_control_status0 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0); + pcs_control_status1 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + if (((0x0F000000 == (pcs_control_status0 & 0x0F000000)) + && (pcs_control_status0 & 0x00100000)) + || ((0x0F000000 == (pcs_control_status1 & 0x0F000000)) + && (pcs_control_status1 & 0x00100000))) + continue; + else + break; + } + if (10 == i) { + u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + if (0x00000040 & u32temp) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); + + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (i++ < 5000)); + } + } +} + +/** + * nes_process_mac_intr + */ +static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) +{ + unsigned long flags; + u32 pcs_control_status; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_vnic *nesvnic; + u32 mac_status; + u32 mac_index = nesdev->mac_index; + u32 u32temp; + u16 phy_data; + u16 temp_phy_data; + u32 pcs_val = 0x0f0f0000; + u32 pcs_mask = 0x0f1f0000; + u32 cdr_ctrl; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) { + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + return; + } + nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT; + + /* ack the MAC interrupt */ + mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200)); + /* Clear the interrupt */ + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200), mac_status); + + nes_debug(NES_DBG_PHY, "MAC%u interrupt status = 0x%X.\n", mac_number, mac_status); + + if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) { + nesdev->link_status_interrupts++; + if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) + nes_reset_link(nesdev, mac_index); + + /* read the PHY interrupt status register */ + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { + do { + nes_read_1G_phy_reg(nesdev, 0x1a, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1a = 0x%X.\n", + nesadapter->phy_index[mac_index], phy_data); + } while (phy_data&0x8000); + + temp_phy_data = 0; + do { + nes_read_1G_phy_reg(nesdev, 0x11, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy%d data from register 0x11 = 0x%X.\n", + nesadapter->phy_index[mac_index], phy_data); + if (temp_phy_data == phy_data) + break; + temp_phy_data = phy_data; + } while (1); + + nes_read_1G_phy_reg(nesdev, 0x1e, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1e = 0x%X.\n", + nesadapter->phy_index[mac_index], phy_data); + + nes_read_1G_phy_reg(nesdev, 1, + nesadapter->phy_index[mac_index], &phy_data); + nes_debug(NES_DBG_PHY, "1G phy%u data from register 1 = 0x%X\n", + nesadapter->phy_index[mac_index], phy_data); + + if (temp_phy_data & 0x1000) { + nes_debug(NES_DBG_PHY, "The Link is up according to the PHY\n"); + phy_data = 4; + } else { + nes_debug(NES_DBG_PHY, "The Link is down according to the PHY\n"); + } + } + nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n", + nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0), + nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200)); + + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) { + switch (mac_index) { + case 1: + case 3: + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + break; + default: + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + break; + } + } else { + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); + } + + nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n", + mac_index, pcs_control_status); + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { + u32temp = 0x01010000; + if (nesadapter->port_count > 2) { + u32temp |= 0x02020000; + } + if ((pcs_control_status & u32temp)!= u32temp) { + phy_data = 0; + nes_debug(NES_DBG_PHY, "PCS says the link is down\n"); + } + } else { + switch (nesadapter->phy_type[mac_index]) { + case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: + case NES_PHY_TYPE_KR: + /* clear the alarms */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005); + /* check link status */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); + break; + + case NES_PHY_TYPE_PUMA_1G: + if (mac_index < 2) + pcs_val = pcs_mask = 0x01010000; + else + pcs_val = pcs_mask = 0x02020000; + /* fall through */ + default: + phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0; + break; + } + } + + if (phy_data & 0x0004) { + if (wide_ppm_offset && + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && + (nesadapter->hw_rev != NE020_REV)) { + cdr_ctrl = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200); + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200, + cdr_ctrl | 0x000F0000); + } + nesadapter->mac_link_down[mac_index] = 0; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n", + nesvnic->linkup); + if (nesvnic->linkup == 0) { + printk(PFX "The Link is now up for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (netif_queue_stopped(nesvnic->netdev)) + netif_start_queue(nesvnic->netdev); + nesvnic->linkup = 1; + netif_carrier_on(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + } else { + if (wide_ppm_offset && + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && + (nesadapter->hw_rev != NE020_REV)) { + cdr_ctrl = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200); + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200, + cdr_ctrl & 0xFFF0FFFF); + } + nesadapter->mac_link_down[mac_index] = 1; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n", + nesvnic->linkup); + if (nesvnic->linkup == 1) { + printk(PFX "The Link is now down for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (!(netif_queue_stopped(nesvnic->netdev))) + netif_stop_queue(nesvnic->netdev); + nesvnic->linkup = 0; + netif_carrier_off(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 1) { + nesdev->iw_status = 0; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + } + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { + if (nesdev->link_recheck) + cancel_delayed_work(&nesdev->work); + nesdev->link_recheck = 1; + schedule_delayed_work(&nesdev->work, + NES_LINK_RECHECK_DELAY); + } + } + + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + + nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE; +} + +void nes_recheck_link_status(struct work_struct *work) +{ + unsigned long flags; + struct nes_device *nesdev = container_of(work, struct nes_device, work.work); + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_vnic *nesvnic; + u32 mac_index = nesdev->mac_index; + u16 phy_data; + u16 temp_phy_data; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + + /* check link status */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, + nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); + + if (phy_data & 0x0004) { + nesadapter->mac_link_down[mac_index] = 0; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + if (nesvnic->linkup == 0) { + printk(PFX "The Link is now up for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (netif_queue_stopped(nesvnic->netdev)) + netif_start_queue(nesvnic->netdev); + nesvnic->linkup = 1; + netif_carrier_on(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + + } else { + nesadapter->mac_link_down[mac_index] = 1; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + if (nesvnic->linkup == 1) { + printk(PFX "The Link is now down for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (!(netif_queue_stopped(nesvnic->netdev))) + netif_stop_queue(nesvnic->netdev); + nesvnic->linkup = 0; + netif_carrier_off(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 1) { + nesdev->iw_status = 0; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + } + if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX) + schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); + else + nesdev->link_recheck = 0; + + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); +} + + +static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) +{ + struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); + + napi_schedule(&nesvnic->napi); +} + + +/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before +* getting out of nic_ce_handler +*/ +#define MAX_RQES_TO_PROCESS 384 + +/** + * nes_nic_ce_handler + */ +void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) +{ + u64 u64temp; + dma_addr_t bus_address; + struct nes_hw_nic *nesnic; + struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct sk_buff *skb; + struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; + __le16 *wqe_fragment_length; + u32 head; + u32 cq_size; + u32 rx_pkt_size; + u32 cqe_count=0; + u32 cqe_errv; + u32 cqe_misc; + u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */ + u16 vlan_tag; + u16 pkt_type; + u16 rqes_processed = 0; + u8 sq_cqes = 0; + u8 nes_use_lro = 0; + + head = cq->cq_head; + cq_size = cq->cq_size; + cq->cqes_pending = 1; + if (nesvnic->netdev->features & NETIF_F_LRO) + nes_use_lro = 1; + do { + if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & + NES_NIC_CQE_VALID) { + nesnic = &nesvnic->nic; + cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]); + if (cqe_misc & NES_NIC_CQE_SQ) { + sq_cqes++; + wqe_fragment_index = 1; + nic_sqe = &nesnic->sq_vbase[nesnic->sq_tail]; + skb = nesnic->tx_skb[nesnic->sq_tail]; + wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* bump past the vlan tag */ + wqe_fragment_length++; + if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { + u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + + wqe_fragment_index * 2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index * 2])) << 32; + bus_address = (dma_addr_t)u64temp; + if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) { + pci_unmap_single(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]), + PCI_DMA_TODEVICE); + } + for (; wqe_fragment_index < 5; wqe_fragment_index++) { + if (wqe_fragment_length[wqe_fragment_index]) { + u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + + wqe_fragment_index * 2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index * 2])) <<32; + bus_address = (dma_addr_t)u64temp; + pci_unmap_page(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[wqe_fragment_index]), + PCI_DMA_TODEVICE); + } else + break; + } + } + if (skb) + dev_kfree_skb_any(skb); + nesnic->sq_tail++; + nesnic->sq_tail &= nesnic->sq_size-1; + if (sq_cqes > 128) { + barrier(); + /* restart the queue if it had been stopped */ + if (netif_queue_stopped(nesvnic->netdev)) + netif_wake_queue(nesvnic->netdev); + sq_cqes = 0; + } + } else { + rqes_processed ++; + + cq->rx_cqes_completed++; + cq->rx_pkts_indicated++; + rx_pkt_size = cqe_misc & 0x0000ffff; + nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail]; + /* Get the skb */ + rx_skb = nesnic->rx_skb[nesnic->rq_tail]; + nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_tail]; + bus_address = (dma_addr_t)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); + bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; + pci_unmap_single(nesdev->pcidev, bus_address, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + cb->busaddr = 0; + /* rx_skb->tail = rx_skb->data + rx_pkt_size; */ + /* rx_skb->len = rx_pkt_size; */ + rx_skb->len = 0; /* TODO: see if this is necessary */ + skb_put(rx_skb, rx_pkt_size); + rx_skb->protocol = eth_type_trans(rx_skb, nesvnic->netdev); + nesnic->rq_tail++; + nesnic->rq_tail &= nesnic->rq_size - 1; + + atomic_inc(&nesvnic->rx_skbs_needed); + if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) { + nes_write32(nesdev->regs+NES_CQE_ALLOC, + cq->cq_number | (cqe_count << 16)); + /* nesadapter->tune_timer.cq_count += cqe_count; */ + nesdev->currcq_count += cqe_count; + cqe_count = 0; + nes_replenish_nic_rq(nesvnic); + } + pkt_type = (u16)(le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])); + cqe_errv = (cqe_misc & NES_NIC_CQE_ERRV_MASK) >> NES_NIC_CQE_ERRV_SHIFT; + rx_skb->ip_summed = CHECKSUM_NONE; + + if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) || + (NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) { + if ((cqe_errv & + (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR | + NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { + if (nesvnic->netdev->features & NETIF_F_RXCSUM) + rx_skb->ip_summed = CHECKSUM_UNNECESSARY; + } else + nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet." + " errv = 0x%X, pkt_type = 0x%X.\n", + nesvnic->netdev->name, cqe_errv, pkt_type); + + } else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) { + if ((cqe_errv & + (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR | + NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { + if (nesvnic->netdev->features & NETIF_F_RXCSUM) { + rx_skb->ip_summed = CHECKSUM_UNNECESSARY; + /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n", + nesvnic->netdev->name); */ + } + } else + nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet." + " errv = 0x%X, pkt_type = 0x%X.\n", + nesvnic->netdev->name, cqe_errv, pkt_type); + } + /* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n", + pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */ + + if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) { + if (nes_cm_recv(rx_skb, nesvnic->netdev)) + rx_skb = NULL; + } + if (rx_skb == NULL) + goto skip_rx_indicate0; + + + if (cqe_misc & NES_NIC_CQE_TAG_VALID) { + vlan_tag = (u16)(le32_to_cpu( + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) + >> 16); + nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", + nesvnic->netdev->name, vlan_tag); + + __vlan_hwaccel_put_tag(rx_skb, vlan_tag); + } + if (nes_use_lro) + lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); + else + netif_receive_skb(rx_skb); + +skip_rx_indicate0: + ; + /* nesvnic->netstats.rx_packets++; */ + /* nesvnic->netstats.rx_bytes += rx_pkt_size; */ + } + + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0; + /* Accounting... */ + cqe_count++; + if (++head >= cq_size) + head = 0; + if (cqe_count == 255) { + /* Replenish Nic CQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, + cq->cq_number | (cqe_count << 16)); + /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ + nesdev->currcq_count += cqe_count; + cqe_count = 0; + } + + if (cq->rx_cqes_completed >= nesvnic->budget) + break; + } else { + cq->cqes_pending = 0; + break; + } + + } while (1); + + if (nes_use_lro) + lro_flush_all(&nesvnic->lro_mgr); + if (sq_cqes) { + barrier(); + /* restart the queue if it had been stopped */ + if (netif_queue_stopped(nesvnic->netdev)) + netif_wake_queue(nesvnic->netdev); + } + cq->cq_head = head; + /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n", + cq->cq_number, cqe_count, cq->cq_head); */ + cq->cqe_allocs_pending = cqe_count; + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ + nesdev->currcq_count += cqe_count; + nes_nic_tune_timer(nesdev); + } + if (atomic_read(&nesvnic->rx_skbs_needed)) + nes_replenish_nic_rq(nesvnic); +} + + + +/** + * nes_cqp_ce_handler + */ +static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) +{ + u64 u64temp; + unsigned long flags; + struct nes_hw_cqp *cqp = NULL; + struct nes_cqp_request *cqp_request; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 head; + u32 cq_size; + u32 cqe_count=0; + u32 error_code; + u32 opcode; + u32 ctx_index; + /* u32 counter; */ + + head = cq->cq_head; + cq_size = cq->cq_size; + + do { + /* process the CQE */ + /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head, + le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */ + + opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]); + if (opcode & NES_CQE_VALID) { + cqp = &nesdev->cqp; + + error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]); + if (error_code) { + nes_debug(NES_DBG_CQP, "Bad Completion code for opcode 0x%02X from CQP," + " Major/Minor codes = 0x%04X:%04X.\n", + le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f, + (u16)(error_code >> 16), + (u16)error_code); + } + + u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | + ((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); + + cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp; + if (cqp_request) { + if (cqp_request->waiting) { + /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */ + cqp_request->major_code = (u16)(error_code >> 16); + cqp_request->minor_code = (u16)error_code; + barrier(); + cqp_request->request_done = 1; + wake_up(&cqp_request->waitq); + nes_put_cqp_request(nesdev, cqp_request); + } else { + if (cqp_request->callback) + cqp_request->cqp_callback(nesdev, cqp_request); + nes_free_cqp_request(nesdev, cqp_request); + } + } else { + wake_up(&nesdev->cqp.waitq); + } + + cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; + nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16)); + if (++cqp->sq_tail >= cqp->sq_size) + cqp->sq_tail = 0; + + /* Accounting... */ + cqe_count++; + if (++head >= cq_size) + head = 0; + } else { + break; + } + } while (1); + cq->cq_head = head; + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + while ((!list_empty(&nesdev->cqp_pending_reqs)) && + ((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) & + (nesdev->cqp.sq_size - 1)) != 1)) { + cqp_request = list_entry(nesdev->cqp_pending_reqs.next, + struct nes_cqp_request, list); + list_del_init(&cqp_request->list); + head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[head]; + memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); + barrier(); + + opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]; + if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) + ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; + else + ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; + cqp_wqe->wqe_words[ctx_index] = + cpu_to_le32((u32)((unsigned long)cqp_request)); + cqp_wqe->wqe_words[ctx_index + 1] = + cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request))); + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n", + cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head); + /* Ring doorbell (1 WQEs) */ + barrier(); + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); + } + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + /* Arm the CCQ */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + cq->cq_number); + nes_read32(nesdev->regs+NES_CQE_ALLOC); +} + +static u8 *locate_mpa(u8 *pkt, u32 aeq_info) +{ + if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) { + /* skip over ethernet header */ + pkt += ETH_HLEN; + + /* Skip over IP and TCP headers */ + pkt += 4 * (pkt[0] & 0x0f); + pkt += 4 * ((pkt[12] >> 4) & 0x0f); + } + return pkt; +} + +/* Determine if incoming error pkt is rdma layer */ +static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info) +{ + u8 *pkt; + u16 *mpa; + u32 opcode = 0xffffffff; + + if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { + pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; + mpa = (u16 *)locate_mpa(pkt, aeq_info); + opcode = be16_to_cpu(mpa[1]) & 0xf; + } + + return opcode; +} + +/* Build iWARP terminate header */ +static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info) +{ + u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; + u16 ddp_seg_len; + int copy_len = 0; + u8 is_tagged = 0; + u8 flush_code = 0; + struct nes_terminate_hdr *termhdr; + + termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase; + memset(termhdr, 0, 64); + + if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { + + /* Use data from offending packet to fill in ddp & rdma hdrs */ + pkt = locate_mpa(pkt, aeq_info); + ddp_seg_len = be16_to_cpu(*(u16 *)pkt); + if (ddp_seg_len) { + copy_len = 2; + termhdr->hdrct = DDP_LEN_FLAG; + if (pkt[2] & 0x80) { + is_tagged = 1; + if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) { + copy_len += TERM_DDP_LEN_TAGGED; + termhdr->hdrct |= DDP_HDR_FLAG; + } + } else { + if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) { + copy_len += TERM_DDP_LEN_UNTAGGED; + termhdr->hdrct |= DDP_HDR_FLAG; + } + + if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) { + if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) { + copy_len += TERM_RDMA_LEN; + termhdr->hdrct |= RDMA_HDR_FLAG; + } + } + } + } + } + + switch (async_event_id) { + case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: + switch (iwarp_opcode(nesqp, aeq_info)) { + case IWARP_OPCODE_WRITE: + flush_code = IB_WC_LOC_PROT_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_INV_STAG; + break; + default: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_STAG; + } + break; + case NES_AEQE_AEID_AMP_INVALID_STAG: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_STAG; + break; + case NES_AEQE_AEID_AMP_BAD_QP: + flush_code = IB_WC_LOC_QP_OP_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_QN; + break; + case NES_AEQE_AEID_AMP_BAD_STAG_KEY: + case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: + switch (iwarp_opcode(nesqp, aeq_info)) { + case IWARP_OPCODE_SEND_INV: + case IWARP_OPCODE_SEND_SE_INV: + flush_code = IB_WC_REM_OP_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_CANT_INV_STAG; + break; + default: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_STAG; + } + break; + case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: + if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) { + flush_code = IB_WC_LOC_PROT_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_BOUNDS; + } else { + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_BOUNDS; + } + break; + case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION: + case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: + case NES_AEQE_AEID_PRIV_OPERATION_DENIED: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_ACCESS; + break; + case NES_AEQE_AEID_AMP_TO_WRAP: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_TO_WRAP; + break; + case NES_AEQE_AEID_AMP_BAD_PD: + switch (iwarp_opcode(nesqp, aeq_info)) { + case IWARP_OPCODE_WRITE: + flush_code = IB_WC_LOC_PROT_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_UNASSOC_STAG; + break; + case IWARP_OPCODE_SEND_INV: + case IWARP_OPCODE_SEND_SE_INV: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_CANT_INV_STAG; + break; + default: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_UNASSOC_STAG; + } + break; + case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH: + flush_code = IB_WC_LOC_LEN_ERR; + termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP; + termhdr->error_code = MPA_MARKER; + break; + case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP; + termhdr->error_code = MPA_CRC; + break; + case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE: + case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: + flush_code = IB_WC_LOC_LEN_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC; + termhdr->error_code = DDP_CATASTROPHIC_LOCAL; + break; + case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC: + case NES_AEQE_AEID_DDP_NO_L_BIT: + flush_code = IB_WC_FATAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC; + termhdr->error_code = DDP_CATASTROPHIC_LOCAL; + break; + case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN: + case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE; + break; + case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + flush_code = IB_WC_LOC_LEN_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG; + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION: + flush_code = IB_WC_GENERAL_ERR; + if (is_tagged) { + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_INV_DDP_VER; + } else { + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER; + } + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_MO: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_MO; + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: + flush_code = IB_WC_REM_OP_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF; + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_QN: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_QN; + break; + case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_INV_RDMAP_VER; + break; + case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE: + flush_code = IB_WC_LOC_QP_OP_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_UNEXPECTED_OP; + break; + default: + flush_code = IB_WC_FATAL_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_UNSPECIFIED; + break; + } + + if (copy_len) + memcpy(termhdr + 1, pkt, copy_len); + + if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) { + if (aeq_info & NES_AEQE_SQ) + nesqp->term_sq_flush_code = flush_code; + else + nesqp->term_rq_flush_code = flush_code; + } + + return sizeof(struct nes_terminate_hdr) + copy_len; +} + +static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp, + struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype) +{ + u64 context; + unsigned long flags; + u32 aeq_info; + u16 async_event_id; + u8 tcp_state; + u8 iwarp_state; + u32 termlen = 0; + u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE | + NES_CQP_QP_TERM_DONT_SEND_FIN; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + if (nesqp->term_flags & NES_TERM_SENT) + return; /* Sanity check */ + + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; + iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; + async_event_id = (u16)aeq_info; + + context = (unsigned long)nesadapter->qp_table[le32_to_cpu( + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; + if (!context) { + WARN_ON(!context); + return; + } + + nesqp = (struct nes_qp *)(unsigned long)context; + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + nesqp->terminate_eventtype = eventtype; + spin_unlock_irqrestore(&nesqp->lock, flags); + + if (nesadapter->send_term_ok) + termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info); + else + mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG; + + if (!nesdev->iw_status) { + nesqp->term_flags = NES_TERM_DONE; + nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_ERROR, 0, 0); + nes_cm_disconn(nesqp); + } else { + nes_terminate_start_timer(nesqp); + nesqp->term_flags |= NES_TERM_SENT; + nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0); + } +} + +static void nes_terminate_send_fin(struct nes_device *nesdev, + struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe) +{ + u32 aeq_info; + u16 async_event_id; + u8 tcp_state; + u8 iwarp_state; + unsigned long flags; + + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; + iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; + async_event_id = (u16)aeq_info; + + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + + /* Send the fin only */ + nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE | + NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0); +} + +/* Cleanup after a terminate sent or received */ +static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred) +{ + u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; + unsigned long flags; + struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device); + struct nes_device *nesdev = nesvnic->nesdev; + u8 first_time = 0; + + spin_lock_irqsave(&nesqp->lock, flags); + if (nesqp->hte_added) { + nesqp->hte_added = 0; + next_iwarp_state |= NES_CQP_QP_DEL_HTE; + } + + first_time = (nesqp->term_flags & NES_TERM_DONE) == 0; + nesqp->term_flags |= NES_TERM_DONE; + spin_unlock_irqrestore(&nesqp->lock, flags); + + /* Make sure we go through this only once */ + if (first_time) { + if (timeout_occurred == 0) + del_timer(&nesqp->terminate_timer); + else + next_iwarp_state |= NES_CQP_QP_RESET; + + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); + } +} + +static void nes_terminate_received(struct nes_device *nesdev, + struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe) +{ + u32 aeq_info; + u8 *pkt; + u32 *mpa; + u8 ddp_ctl; + u8 rdma_ctl; + u16 aeq_id = 0; + + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { + /* Terminate is not a performance path so the silicon */ + /* did not validate the frame - do it now */ + pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; + mpa = (u32 *)locate_mpa(pkt, aeq_info); + ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff; + rdma_ctl = be32_to_cpu(mpa[0]) & 0xff; + if ((ddp_ctl & 0xc0) != 0x40) + aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC; + else if ((ddp_ctl & 0x03) != 1) + aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION; + else if (be32_to_cpu(mpa[2]) != 2) + aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN; + else if (be32_to_cpu(mpa[3]) != 1) + aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN; + else if (be32_to_cpu(mpa[4]) != 0) + aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO; + else if ((rdma_ctl & 0xc0) != 0x40) + aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION; + + if (aeq_id) { + /* Bad terminate recvd - send back a terminate */ + aeq_info = (aeq_info & 0xffff0000) | aeq_id; + aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info); + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); + return; + } + } + + nesqp->term_flags |= NES_TERM_RCVD; + nesqp->terminate_eventtype = IB_EVENT_QP_FATAL; + nes_terminate_start_timer(nesqp); + nes_terminate_send_fin(nesdev, nesqp, aeqe); +} + +/* Timeout routine in case terminate fails to complete */ +static void nes_terminate_timeout(unsigned long context) +{ + struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context; + + nes_terminate_done(nesqp, 1); +} + +/* Set a timer in case hw cannot complete the terminate sequence */ +static void nes_terminate_start_timer(struct nes_qp *nesqp) +{ + init_timer(&nesqp->terminate_timer); + nesqp->terminate_timer.function = nes_terminate_timeout; + nesqp->terminate_timer.expires = jiffies + HZ; + nesqp->terminate_timer.data = (unsigned long)nesqp; + add_timer(&nesqp->terminate_timer); +} + +/** + * nes_process_iwarp_aeqe + */ +static void nes_process_iwarp_aeqe(struct nes_device *nesdev, + struct nes_hw_aeqe *aeqe) +{ + u64 context; + unsigned long flags; + struct nes_qp *nesqp; + struct nes_hw_cq *hw_cq; + struct nes_cq *nescq; + int resource_allocated; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 aeq_info; + u32 next_iwarp_state = 0; + u32 aeqe_cq_id; + u16 async_event_id; + u8 tcp_state; + u8 iwarp_state; + struct ib_event ibevent; + + nes_debug(NES_DBG_AEQ, "\n"); + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) { + context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); + context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; + } else { + context = (unsigned long)nesadapter->qp_table[le32_to_cpu( + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; + BUG_ON(!context); + } + + /* context is nesqp unless async_event_id == CQ ERROR */ + nesqp = (struct nes_qp *)(unsigned long)context; + async_event_id = (u16)aeq_info; + tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; + iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; + nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p," + " Tcp state = %s, iWARP state = %s\n", + async_event_id, + le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, + nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); + + aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); + if (aeq_info & NES_AEQE_QP) { + if (!nes_is_resource_allocated(nesadapter, + nesadapter->allocated_qps, + aeqe_cq_id)) + return; + } + + switch (async_event_id) { + case NES_AEQE_AEID_LLP_FIN_RECEIVED: + if (nesqp->term_flags) + return; /* Ignore it, wait for close complete */ + + if (atomic_inc_return(&nesqp->close_timer_started) == 1) { + if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) && + (nesqp->ibqp_state == IB_QPS_RTS)) { + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); + } + nesqp->cm_id->add_ref(nesqp->cm_id); + schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, + NES_TIMER_TYPE_CLOSE, 1, 0); + nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d)," + " need ae to finish up, original_last_aeq = 0x%04X." + " last_aeq = 0x%04X, scheduling timer. TCP state = %d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + async_event_id, nesqp->last_aeq, tcp_state); + } + break; + case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_cm_disconn(nesqp); + break; + + case NES_AEQE_AEID_RESET_SENT: + tcp_state = NES_AEQE_TCP_STATE_CLOSED; + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + nesqp->hte_added = 0; + spin_unlock_irqrestore(&nesqp->lock, flags); + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE; + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); + break; + + case NES_AEQE_AEID_LLP_CONNECTION_RESET: + if (atomic_read(&nesqp->close_timer_started)) + return; + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_cm_disconn(nesqp); + break; + + case NES_AEQE_AEID_TERMINATE_SENT: + nes_terminate_send_fin(nesdev, nesqp, aeqe); + break; + + case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: + nes_terminate_received(nesdev, nesqp, aeqe); + break; + + case NES_AEQE_AEID_AMP_BAD_STAG_KEY: + case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: + case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: + case NES_AEQE_AEID_AMP_INVALID_STAG: + case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION: + case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: + case NES_AEQE_AEID_PRIV_OPERATION_DENIED: + case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: + case NES_AEQE_AEID_AMP_TO_WRAP: + printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n", + nesqp->hwqp.qp_id, async_event_id); + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR); + break; + + case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE: + case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: + case NES_AEQE_AEID_DDP_UBE_INVALID_MO: + case NES_AEQE_AEID_DDP_UBE_INVALID_QN: + if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) { + aeq_info &= 0xffff0000; + aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE; + aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info); + } + + case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE: + case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: + case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: + case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: + case NES_AEQE_AEID_AMP_BAD_QP: + case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH: + case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC: + case NES_AEQE_AEID_DDP_NO_L_BIT: + case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN: + case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID: + case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION: + case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION: + case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE: + case NES_AEQE_AEID_AMP_BAD_PD: + case NES_AEQE_AEID_AMP_FASTREG_SHARED: + case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG: + case NES_AEQE_AEID_AMP_FASTREG_MW_STAG: + case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS: + case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW: + case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH: + case NES_AEQE_AEID_AMP_INVALIDATE_SHARED: + case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS: + case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG: + case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG: + case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG: + case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG: + case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS: + case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS: + case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT: + case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED: + case NES_AEQE_AEID_BAD_CLOSE: + case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO: + case NES_AEQE_AEID_STAG_ZERO_INVALID: + case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST: + case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: + printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n", + nesqp->hwqp.qp_id, async_event_id); + print_ip(nesqp->cm_node); + if (!atomic_read(&nesqp->close_timer_started)) + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); + break; + + case NES_AEQE_AEID_CQ_OPERATION_ERROR: + context <<= 1; + nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n", + le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), (void *)(unsigned long)context); + resource_allocated = nes_is_resource_allocated(nesadapter, nesadapter->allocated_cqs, + le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); + if (resource_allocated) { + printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n", + __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); + hw_cq = (struct nes_hw_cq *)(unsigned long)context; + if (hw_cq) { + nescq = container_of(hw_cq, struct nes_cq, hw_cq); + if (nescq->ibcq.event_handler) { + ibevent.device = nescq->ibcq.device; + ibevent.event = IB_EVENT_CQ_ERR; + ibevent.element.cq = &nescq->ibcq; + nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context); + } + } + } + break; + + default: + nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n", + async_event_id); + break; + } + +} + +/** + * nes_iwarp_ce_handler + */ +void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq) +{ + struct nes_cq *nescq = container_of(hw_cq, struct nes_cq, hw_cq); + + /* nes_debug(NES_DBG_CQ, "Processing completion event for iWARP CQ%u.\n", + nescq->hw_cq.cq_number); */ + nes_write32(nesdev->regs+NES_CQ_ACK, nescq->hw_cq.cq_number); + + if (nescq->ibcq.comp_handler) + nescq->ibcq.comp_handler(&nescq->ibcq, nescq->ibcq.cq_context); + + return; +} + + +/** + * nes_manage_apbvt() + */ +int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, + u32 nic_index, u32 add_port) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + int ret = 0; + u16 major_code; + + /* Send manage APBVT request to CQP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + nes_debug(NES_DBG_QP, "%s APBV for local port=%u(0x%04x), nic_index=%u\n", + (add_port == NES_MANAGE_APBVT_ADD) ? "ADD" : "DEL", + accel_local_port, accel_local_port, nic_index); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, (NES_CQP_MANAGE_APBVT | + ((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + ((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port)); + + nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + if (add_port == NES_MANAGE_APBVT_ADD) + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", + ret, cqp_request->major_code, cqp_request->minor_code); + major_code = cqp_request->major_code; + + nes_put_cqp_request(nesdev, cqp_request); + + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + else + return 0; +} + + +/** + * nes_manage_arp_cache + */ +void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, + u32 ip_addr, u32 action) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev; + struct nes_cqp_request *cqp_request; + int arp_index; + + nesdev = nesvnic->nesdev; + arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action); + if (arp_index == -1) { + return; + } + + /* update the ARP entry */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n"); + return; + } + cqp_request->waiting = 0; + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_MANAGE_ARP_CACHE | NES_CQP_ARP_PERM); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32( + (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index); + + if (action == NES_ARP_ADD) { + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID); + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32( + (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | + (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( + (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); + } else { + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0; + } + + nes_debug(NES_DBG_NETDEV, "Not waiting for CQP, cqp.sq_head=%u, cqp.sq_tail=%u\n", + nesdev->cqp.sq_head, nesdev->cqp.sq_tail); + + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request); +} + + +/** + * flush_wqes + */ +void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, + u32 which_wq, u32 wait_completion) +{ + struct nes_cqp_request *cqp_request; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH; + u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH; + int ret; + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); + return; + } + if (wait_completion) { + cqp_request->waiting = 1; + atomic_set(&cqp_request->refcount, 2); + } else { + cqp_request->waiting = 0; + } + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + /* If wqe in error was identified, set code to be put into cqe */ + if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) { + which_wq |= NES_CQP_FLUSH_MAJ_MIN; + sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code; + nesqp->term_sq_flush_code = 0; + } + + if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) { + which_wq |= NES_CQP_FLUSH_MAJ_MIN; + rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code; + nesqp->term_rq_flush_code = 0; + } + + if (which_wq & NES_CQP_FLUSH_MAJ_MIN) { + cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code); + cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code); + } + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = + cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); + + nes_post_cqp_request(nesdev, cqp_request); + + if (wait_completion) { + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," + " CQP Major:Minor codes = 0x%04X:0x%04X\n", + ret, cqp_request->major_code, cqp_request->minor_code); + nes_put_cqp_request(nesdev, cqp_request); + } +} diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h new file mode 100644 index 00000000..d748e4b3 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -0,0 +1,1392 @@ +/* +* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __NES_HW_H +#define __NES_HW_H + +#include + +#define NES_PHY_TYPE_CX4 1 +#define NES_PHY_TYPE_1G 2 +#define NES_PHY_TYPE_ARGUS 4 +#define NES_PHY_TYPE_PUMA_1G 5 +#define NES_PHY_TYPE_PUMA_10G 6 +#define NES_PHY_TYPE_GLADIUS 7 +#define NES_PHY_TYPE_SFP_D 8 +#define NES_PHY_TYPE_KR 9 + +#define NES_MULTICAST_PF_MAX 8 +#define NES_A0 3 + +#define NES_ENABLE_PAU 0x07000001 +#define NES_DISABLE_PAU 0x07000000 +#define NES_PAU_COUNTER 10 +#define NES_CQP_OPCODE_MASK 0x3f + +enum pci_regs { + NES_INT_STAT = 0x0000, + NES_INT_MASK = 0x0004, + NES_INT_PENDING = 0x0008, + NES_INTF_INT_STAT = 0x000C, + NES_INTF_INT_MASK = 0x0010, + NES_TIMER_STAT = 0x0014, + NES_PERIODIC_CONTROL = 0x0018, + NES_ONE_SHOT_CONTROL = 0x001C, + NES_EEPROM_COMMAND = 0x0020, + NES_EEPROM_DATA = 0x0024, + NES_FLASH_COMMAND = 0x0028, + NES_FLASH_DATA = 0x002C, + NES_SOFTWARE_RESET = 0x0030, + NES_CQ_ACK = 0x0034, + NES_WQE_ALLOC = 0x0040, + NES_CQE_ALLOC = 0x0044, + NES_AEQ_ALLOC = 0x0048 +}; + +enum indexed_regs { + NES_IDX_CREATE_CQP_LOW = 0x0000, + NES_IDX_CREATE_CQP_HIGH = 0x0004, + NES_IDX_QP_CONTROL = 0x0040, + NES_IDX_FLM_CONTROL = 0x0080, + NES_IDX_INT_CPU_STATUS = 0x00a0, + NES_IDX_GPR_TRIGGER = 0x00bc, + NES_IDX_GPIO_CONTROL = 0x00f0, + NES_IDX_GPIO_DATA = 0x00f4, + NES_IDX_GPR2 = 0x010c, + NES_IDX_TCP_CONFIG0 = 0x01e4, + NES_IDX_TCP_TIMER_CONFIG = 0x01ec, + NES_IDX_TCP_NOW = 0x01f0, + NES_IDX_QP_MAX_CFG_SIZES = 0x0200, + NES_IDX_QP_CTX_SIZE = 0x0218, + NES_IDX_TCP_TIMER_SIZE0 = 0x0238, + NES_IDX_TCP_TIMER_SIZE1 = 0x0240, + NES_IDX_ARP_CACHE_SIZE = 0x0258, + NES_IDX_CQ_CTX_SIZE = 0x0260, + NES_IDX_MRT_SIZE = 0x0278, + NES_IDX_PBL_REGION_SIZE = 0x0280, + NES_IDX_IRRQ_COUNT = 0x02b0, + NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x02f0, + NES_IDX_RX_WINDOW_BUFFER_SIZE = 0x0300, + NES_IDX_DST_IP_ADDR = 0x0400, + NES_IDX_PCIX_DIAG = 0x08e8, + NES_IDX_MPP_DEBUG = 0x0a00, + NES_IDX_PORT_RX_DISCARDS = 0x0a30, + NES_IDX_PORT_TX_DISCARDS = 0x0a34, + NES_IDX_MPP_LB_DEBUG = 0x0b00, + NES_IDX_DENALI_CTL_22 = 0x1058, + NES_IDX_MAC_TX_CONTROL = 0x2000, + NES_IDX_MAC_TX_CONFIG = 0x2004, + NES_IDX_MAC_TX_PAUSE_QUANTA = 0x2008, + NES_IDX_MAC_RX_CONTROL = 0x200c, + NES_IDX_MAC_RX_CONFIG = 0x2010, + NES_IDX_MAC_EXACT_MATCH_BOTTOM = 0x201c, + NES_IDX_MAC_MDIO_CONTROL = 0x2084, + NES_IDX_MAC_TX_OCTETS_LOW = 0x2100, + NES_IDX_MAC_TX_OCTETS_HIGH = 0x2104, + NES_IDX_MAC_TX_FRAMES_LOW = 0x2108, + NES_IDX_MAC_TX_FRAMES_HIGH = 0x210c, + NES_IDX_MAC_TX_PAUSE_FRAMES = 0x2118, + NES_IDX_MAC_TX_ERRORS = 0x2138, + NES_IDX_MAC_RX_OCTETS_LOW = 0x213c, + NES_IDX_MAC_RX_OCTETS_HIGH = 0x2140, + NES_IDX_MAC_RX_FRAMES_LOW = 0x2144, + NES_IDX_MAC_RX_FRAMES_HIGH = 0x2148, + NES_IDX_MAC_RX_BC_FRAMES_LOW = 0x214c, + NES_IDX_MAC_RX_MC_FRAMES_HIGH = 0x2150, + NES_IDX_MAC_RX_PAUSE_FRAMES = 0x2154, + NES_IDX_MAC_RX_SHORT_FRAMES = 0x2174, + NES_IDX_MAC_RX_OVERSIZED_FRAMES = 0x2178, + NES_IDX_MAC_RX_JABBER_FRAMES = 0x217c, + NES_IDX_MAC_RX_CRC_ERR_FRAMES = 0x2180, + NES_IDX_MAC_RX_LENGTH_ERR_FRAMES = 0x2184, + NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES = 0x2188, + NES_IDX_MAC_INT_STATUS = 0x21f0, + NES_IDX_MAC_INT_MASK = 0x21f4, + NES_IDX_PHY_PCS_CONTROL_STATUS0 = 0x2800, + NES_IDX_PHY_PCS_CONTROL_STATUS1 = 0x2a00, + NES_IDX_ETH_SERDES_COMMON_CONTROL0 = 0x2808, + NES_IDX_ETH_SERDES_COMMON_CONTROL1 = 0x2a08, + NES_IDX_ETH_SERDES_COMMON_STATUS0 = 0x280c, + NES_IDX_ETH_SERDES_COMMON_STATUS1 = 0x2a0c, + NES_IDX_ETH_SERDES_TX_EMP0 = 0x2810, + NES_IDX_ETH_SERDES_TX_EMP1 = 0x2a10, + NES_IDX_ETH_SERDES_TX_DRIVE0 = 0x2814, + NES_IDX_ETH_SERDES_TX_DRIVE1 = 0x2a14, + NES_IDX_ETH_SERDES_RX_MODE0 = 0x2818, + NES_IDX_ETH_SERDES_RX_MODE1 = 0x2a18, + NES_IDX_ETH_SERDES_RX_SIGDET0 = 0x281c, + NES_IDX_ETH_SERDES_RX_SIGDET1 = 0x2a1c, + NES_IDX_ETH_SERDES_BYPASS0 = 0x2820, + NES_IDX_ETH_SERDES_BYPASS1 = 0x2a20, + NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0 = 0x2824, + NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1 = 0x2a24, + NES_IDX_ETH_SERDES_RX_EQ_CONTROL0 = 0x2828, + NES_IDX_ETH_SERDES_RX_EQ_CONTROL1 = 0x2a28, + NES_IDX_ETH_SERDES_RX_EQ_STATUS0 = 0x282c, + NES_IDX_ETH_SERDES_RX_EQ_STATUS1 = 0x2a2c, + NES_IDX_ETH_SERDES_CDR_RESET0 = 0x2830, + NES_IDX_ETH_SERDES_CDR_RESET1 = 0x2a30, + NES_IDX_ETH_SERDES_CDR_CONTROL0 = 0x2834, + NES_IDX_ETH_SERDES_CDR_CONTROL1 = 0x2a34, + NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0 = 0x2838, + NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1 = 0x2a38, + NES_IDX_ENDNODE0_NSTAT_RX_DISCARD = 0x3080, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO = 0x3000, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI = 0x3004, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO = 0x3008, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI = 0x300c, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO = 0x7000, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c, + NES_IDX_WQM_CONFIG0 = 0x5000, + NES_IDX_WQM_CONFIG1 = 0x5004, + NES_IDX_CM_CONFIG = 0x5100, + NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000, + NES_IDX_NIC_PHYPORT_TO_USW = 0x6008, + NES_IDX_NIC_ACTIVE = 0x6010, + NES_IDX_NIC_UNICAST_ALL = 0x6018, + NES_IDX_NIC_MULTICAST_ALL = 0x6020, + NES_IDX_NIC_MULTICAST_ENABLE = 0x6028, + NES_IDX_NIC_BROADCAST_ON = 0x6030, + NES_IDX_USED_CHUNKS_TX = 0x60b0, + NES_IDX_TX_POOL_SIZE = 0x60b8, + NES_IDX_QUAD_HASH_TABLE_SIZE = 0x6148, + NES_IDX_PERFECT_FILTER_LOW = 0x6200, + NES_IDX_PERFECT_FILTER_HIGH = 0x6204, + NES_IDX_IPV4_TCP_REXMITS = 0x7080, + NES_IDX_DEBUG_ERROR_CONTROL_STATUS = 0x913c, + NES_IDX_DEBUG_ERROR_MASKS0 = 0x9140, + NES_IDX_DEBUG_ERROR_MASKS1 = 0x9144, + NES_IDX_DEBUG_ERROR_MASKS2 = 0x9148, + NES_IDX_DEBUG_ERROR_MASKS3 = 0x914c, + NES_IDX_DEBUG_ERROR_MASKS4 = 0x9150, + NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154, +}; + +#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1 +#define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17) + +enum nes_cqp_opcodes { + NES_CQP_CREATE_QP = 0x00, + NES_CQP_MODIFY_QP = 0x01, + NES_CQP_DESTROY_QP = 0x02, + NES_CQP_CREATE_CQ = 0x03, + NES_CQP_MODIFY_CQ = 0x04, + NES_CQP_DESTROY_CQ = 0x05, + NES_CQP_ALLOCATE_STAG = 0x09, + NES_CQP_REGISTER_STAG = 0x0a, + NES_CQP_QUERY_STAG = 0x0b, + NES_CQP_REGISTER_SHARED_STAG = 0x0c, + NES_CQP_DEALLOCATE_STAG = 0x0d, + NES_CQP_MANAGE_ARP_CACHE = 0x0f, + NES_CQP_DOWNLOAD_SEGMENT = 0x10, + NES_CQP_SUSPEND_QPS = 0x11, + NES_CQP_UPLOAD_CONTEXT = 0x13, + NES_CQP_CREATE_CEQ = 0x16, + NES_CQP_DESTROY_CEQ = 0x18, + NES_CQP_CREATE_AEQ = 0x19, + NES_CQP_DESTROY_AEQ = 0x1b, + NES_CQP_LMI_ACCESS = 0x20, + NES_CQP_FLUSH_WQES = 0x22, + NES_CQP_MANAGE_APBVT = 0x23, + NES_CQP_MANAGE_QUAD_HASH = 0x25 +}; + +enum nes_cqp_wqe_word_idx { + NES_CQP_WQE_OPCODE_IDX = 0, + NES_CQP_WQE_ID_IDX = 1, + NES_CQP_WQE_COMP_CTX_LOW_IDX = 2, + NES_CQP_WQE_COMP_CTX_HIGH_IDX = 3, + NES_CQP_WQE_COMP_SCRATCH_LOW_IDX = 4, + NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5, +}; + +enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */ + NES_CQP_WQE_DL_OPCODE_IDX = 0, + NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1, + NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2, + NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3 + /* For index values 4-15 use NES_NIC_SQ_WQE_ values */ +}; + +enum nes_cqp_cq_wqeword_idx { + NES_CQP_CQ_WQE_PBL_LOW_IDX = 6, + NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7, + NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX = 8, + NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX = 9, + NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX = 10, +}; + +enum nes_cqp_stag_wqeword_idx { + NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX = 1, + NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX = 6, + NES_CQP_STAG_WQE_LEN_LOW_IDX = 7, + NES_CQP_STAG_WQE_STAG_IDX = 8, + NES_CQP_STAG_WQE_VA_LOW_IDX = 10, + NES_CQP_STAG_WQE_VA_HIGH_IDX = 11, + NES_CQP_STAG_WQE_PA_LOW_IDX = 12, + NES_CQP_STAG_WQE_PA_HIGH_IDX = 13, + NES_CQP_STAG_WQE_PBL_LEN_IDX = 14 +}; + +#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26 +#define NES_CQP_OP_IWARP_STATE_SHIFT 28 +#define NES_CQP_OP_TERMLEN_SHIFT 28 + +enum nes_cqp_qp_bits { + NES_CQP_QP_ARP_VALID = (1<<8), + NES_CQP_QP_WINBUF_VALID = (1<<9), + NES_CQP_QP_CONTEXT_VALID = (1<<10), + NES_CQP_QP_ORD_VALID = (1<<11), + NES_CQP_QP_WINBUF_DATAIND_EN = (1<<12), + NES_CQP_QP_VIRT_WQS = (1<<13), + NES_CQP_QP_DEL_HTE = (1<<14), + NES_CQP_QP_CQS_VALID = (1<<15), + NES_CQP_QP_TYPE_TSA = 0, + NES_CQP_QP_TYPE_IWARP = (1<<16), + NES_CQP_QP_TYPE_CQP = (4<<16), + NES_CQP_QP_TYPE_NIC = (5<<16), + NES_CQP_QP_MSS_CHG = (1<<20), + NES_CQP_QP_STATIC_RESOURCES = (1<<21), + NES_CQP_QP_IGNORE_MW_BOUND = (1<<22), + NES_CQP_QP_VWQ_USE_LMI = (1<<23), + NES_CQP_QP_IWARP_STATE_IDLE = (1<netdev */ + u8 perfect_filter_index; + u8 nic_index; + u8 qp_nic_index[4]; + u8 next_qp_nic_index; + u8 of_device_registered; + u8 rdma_enabled; + u32 lro_max_aggr; + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; + struct timer_list event_timer; + enum ib_event_type delayed_event; + enum ib_event_type last_dispatched_event; + spinlock_t port_ibevent_lock; + u32 mgt_mem_size; + void *mgt_vbase; + dma_addr_t mgt_pbase; + struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT]; + struct task_struct *mgt_thread; + wait_queue_head_t mgt_wait_queue; + struct sk_buff_head mgt_skb_list; + +}; + +struct nes_ib_device { + struct ib_device ibdev; + struct nes_vnic *nesvnic; + + /* Virtual RNIC Limits */ + u32 max_mr; + u32 max_qp; + u32 max_cq; + u32 max_pd; + u32 num_mr; + u32 num_qp; + u32 num_cq; + u32 num_pd; +}; + +enum nes_hdrct_flags { + DDP_LEN_FLAG = 0x80, + DDP_HDR_FLAG = 0x40, + RDMA_HDR_FLAG = 0x20 +}; + +enum nes_term_layers { + LAYER_RDMA = 0, + LAYER_DDP = 1, + LAYER_MPA = 2 +}; + +enum nes_term_error_types { + RDMAP_CATASTROPHIC = 0, + RDMAP_REMOTE_PROT = 1, + RDMAP_REMOTE_OP = 2, + DDP_CATASTROPHIC = 0, + DDP_TAGGED_BUFFER = 1, + DDP_UNTAGGED_BUFFER = 2, + DDP_LLP = 3 +}; + +enum nes_term_rdma_errors { + RDMAP_INV_STAG = 0x00, + RDMAP_INV_BOUNDS = 0x01, + RDMAP_ACCESS = 0x02, + RDMAP_UNASSOC_STAG = 0x03, + RDMAP_TO_WRAP = 0x04, + RDMAP_INV_RDMAP_VER = 0x05, + RDMAP_UNEXPECTED_OP = 0x06, + RDMAP_CATASTROPHIC_LOCAL = 0x07, + RDMAP_CATASTROPHIC_GLOBAL = 0x08, + RDMAP_CANT_INV_STAG = 0x09, + RDMAP_UNSPECIFIED = 0xff +}; + +enum nes_term_ddp_errors { + DDP_CATASTROPHIC_LOCAL = 0x00, + DDP_TAGGED_INV_STAG = 0x00, + DDP_TAGGED_BOUNDS = 0x01, + DDP_TAGGED_UNASSOC_STAG = 0x02, + DDP_TAGGED_TO_WRAP = 0x03, + DDP_TAGGED_INV_DDP_VER = 0x04, + DDP_UNTAGGED_INV_QN = 0x01, + DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02, + DDP_UNTAGGED_INV_MSN_RANGE = 0x03, + DDP_UNTAGGED_INV_MO = 0x04, + DDP_UNTAGGED_INV_TOO_LONG = 0x05, + DDP_UNTAGGED_INV_DDP_VER = 0x06 +}; + +enum nes_term_mpa_errors { + MPA_CLOSED = 0x01, + MPA_CRC = 0x02, + MPA_MARKER = 0x03, + MPA_REQ_RSP = 0x04, +}; + +struct nes_terminate_hdr { + u8 layer_etype; + u8 error_code; + u8 hdrct; + u8 rsvd; +}; + +/* Used to determine how to fill in terminate error codes */ +#define IWARP_OPCODE_WRITE 0 +#define IWARP_OPCODE_READREQ 1 +#define IWARP_OPCODE_READRSP 2 +#define IWARP_OPCODE_SEND 3 +#define IWARP_OPCODE_SEND_INV 4 +#define IWARP_OPCODE_SEND_SE 5 +#define IWARP_OPCODE_SEND_SE_INV 6 +#define IWARP_OPCODE_TERM 7 + +/* These values are used only during terminate processing */ +#define TERM_DDP_LEN_TAGGED 14 +#define TERM_DDP_LEN_UNTAGGED 18 +#define TERM_RDMA_LEN 28 +#define RDMA_OPCODE_MASK 0x0f +#define RDMA_READ_REQ_OPCODE 1 +#define BAD_FRAME_OFFSET 64 +#define CQE_MAJOR_DRV 0x8000 + +/* Used for link status recheck after interrupt processing */ +#define NES_LINK_RECHECK_DELAY msecs_to_jiffies(50) +#define NES_LINK_RECHECK_MAX 60 + +#endif /* __NES_HW_H */ diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c new file mode 100644 index 00000000..3ba7be36 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -0,0 +1,1162 @@ +/* + * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include "nes.h" +#include "nes_mgt.h" + +atomic_t pau_qps_created; +atomic_t pau_qps_destroyed; + +static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic) +{ + unsigned long flags; + dma_addr_t bus_address; + struct sk_buff *skb; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_hw_mgt *nesmgt; + struct nes_device *nesdev; + struct nes_rskb_cb *cb; + u32 rx_wqes_posted = 0; + + nesmgt = &mgtvnic->mgt; + nesdev = mgtvnic->nesvnic->nesdev; + spin_lock_irqsave(&nesmgt->rq_lock, flags); + if (nesmgt->replenishing_rq != 0) { + if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && + (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { + atomic_set(&mgtvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ + add_timer(&mgtvnic->rq_wqes_timer); + } else { + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + } + return; + } + nesmgt->replenishing_rq = 1; + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + do { + skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size); + if (skb) { + skb->dev = mgtvnic->nesvnic->netdev; + + bus_address = pci_map_single(nesdev->pcidev, + skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = bus_address; + cb->maplen = mgtvnic->nesvnic->max_frame_size; + + nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head]; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = + cpu_to_le32(mgtvnic->nesvnic->max_frame_size); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)bus_address); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)bus_address >> 32)); + nesmgt->rx_skb[nesmgt->rq_head] = skb; + nesmgt->rq_head++; + nesmgt->rq_head &= nesmgt->rq_size - 1; + atomic_dec(&mgtvnic->rx_skbs_needed); + barrier(); + if (++rx_wqes_posted == 255) { + nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); + rx_wqes_posted = 0; + } + } else { + spin_lock_irqsave(&nesmgt->rq_lock, flags); + if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && + (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { + atomic_set(&mgtvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ + add_timer(&mgtvnic->rq_wqes_timer); + } else { + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + } + break; + } + } while (atomic_read(&mgtvnic->rx_skbs_needed)); + barrier(); + if (rx_wqes_posted) + nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); + nesmgt->replenishing_rq = 0; +} + +/** + * nes_mgt_rq_wqes_timeout + */ +static void nes_mgt_rq_wqes_timeout(unsigned long parm) +{ + struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm; + + atomic_set(&mgtvnic->rx_skb_timer_running, 0); + if (atomic_read(&mgtvnic->rx_skbs_needed)) + nes_replenish_mgt_rq(mgtvnic); +} + +/** + * nes_mgt_free_skb - unmap and free skb + */ +static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir) +{ + struct nes_rskb_cb *cb; + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir); + cb->busaddr = 0; + dev_kfree_skb_any(skb); +} + +/** + * nes_download_callback - handle download completions + */ +static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer; + struct nes_qp *nesqp = fpdu_info->nesqp; + struct sk_buff *skb; + int i; + + for (i = 0; i < fpdu_info->frag_cnt; i++) { + skb = fpdu_info->frags[i].skb; + if (fpdu_info->frags[i].cmplt) { + nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + } + + if (fpdu_info->hdr_vbase) + pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len, + fpdu_info->hdr_vbase, fpdu_info->hdr_pbase); + kfree(fpdu_info); +} + +/** + * nes_get_seq - Get the seq, ack_seq and window from the packet + */ +static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) +{ + struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0]; + struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + + *ack = be32_to_cpu(tcph->ack_seq); + *wnd = be16_to_cpu(tcph->window); + *fin_rcvd = tcph->fin; + *rst_rcvd = tcph->rst; + return be32_to_cpu(tcph->seq); +} + +/** + * nes_get_next_skb - Get the next skb based on where current skb is in the queue + */ +static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp, + struct sk_buff *skb, u32 nextseq, u32 *ack, + u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) +{ + u32 seq; + bool processacks; + struct sk_buff *old_skb; + + if (skb) { + /* Continue processing fpdu */ + if (skb->next == (struct sk_buff *)&nesqp->pau_list) + goto out; + skb = skb->next; + processacks = false; + } else { + /* Starting a new one */ + if (skb_queue_empty(&nesqp->pau_list)) + goto out; + skb = skb_peek(&nesqp->pau_list); + processacks = true; + } + + while (1) { + seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd); + if (seq == nextseq) { + if (skb->len || processacks) + break; + } else if (after(seq, nextseq)) { + goto out; + } + + if (skb->next == (struct sk_buff *)&nesqp->pau_list) + goto out; + + old_skb = skb; + skb = skb->next; + skb_unlink(old_skb, &nesqp->pau_list); + nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + return skb; + +out: + return NULL; +} + +/** + * get_fpdu_info - Find the next complete fpdu and return its fragments. + */ +static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, + struct pau_fpdu_info **pau_fpdu_info) +{ + struct sk_buff *skb; + struct iphdr *iph; + struct tcphdr *tcph; + struct nes_rskb_cb *cb; + struct pau_fpdu_info *fpdu_info = NULL; + struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; + unsigned long flags; + u32 fpdu_len = 0; + u32 tmp_len; + int frag_cnt = 0; + u32 tot_len; + u32 frag_tot; + u32 ack; + u32 fin_rcvd; + u32 rst_rcvd; + u16 wnd; + int i; + int rc = 0; + + *pau_fpdu_info = NULL; + + spin_lock_irqsave(&nesqp->pau_lock, flags); + skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd); + if (!skb) { + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + goto out; + } + cb = (struct nes_rskb_cb *)&skb->cb[0]; + if (skb->len) { + fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING; + fpdu_len = (fpdu_len + 3) & 0xfffffffc; + tmp_len = fpdu_len; + + /* See if we have all of the fpdu */ + frag_tot = 0; + memset(&frags, 0, sizeof frags); + for (i = 0; i < MAX_FPDU_FRAGS; i++) { + frags[i].physaddr = cb->busaddr; + frags[i].physaddr += skb->data - cb->data_start; + frags[i].frag_len = min(tmp_len, skb->len); + frags[i].skb = skb; + frags[i].cmplt = (skb->len == frags[i].frag_len); + frag_tot += frags[i].frag_len; + frag_cnt++; + + tmp_len -= frags[i].frag_len; + if (tmp_len == 0) + break; + + skb = nes_get_next_skb(nesdev, nesqp, skb, + nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd); + if (!skb) { + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + goto out; + } else if (rst_rcvd) { + /* rst received in the middle of fpdu */ + for (; i >= 0; i--) { + skb_unlink(frags[i].skb, &nesqp->pau_list); + nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE); + } + cb = (struct nes_rskb_cb *)&skb->cb[0]; + frags[0].physaddr = cb->busaddr; + frags[0].physaddr += skb->data - cb->data_start; + frags[0].frag_len = skb->len; + frags[0].skb = skb; + frags[0].cmplt = true; + frag_cnt = 1; + break; + } + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + } + } else { + /* no data */ + frags[0].physaddr = cb->busaddr; + frags[0].frag_len = 0; + frags[0].skb = skb; + frags[0].cmplt = true; + frag_cnt = 1; + } + + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + + /* Found one */ + fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); + if (fpdu_info == NULL) { + nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n"); + rc = -ENOMEM; + goto out; + } + + fpdu_info->cqp_request = nes_get_cqp_request(nesdev); + if (fpdu_info->cqp_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + rc = -ENOMEM; + goto out; + } + + cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0]; + iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start; + fpdu_info->data_len = fpdu_len; + tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN; + + if (frags[0].cmplt) { + fpdu_info->hdr_pbase = cb->busaddr; + fpdu_info->hdr_vbase = NULL; + } else { + fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev, + fpdu_info->hdr_len, &fpdu_info->hdr_pbase); + if (!fpdu_info->hdr_vbase) { + nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n"); + rc = -ENOMEM; + goto out; + } + + /* Copy hdrs, adjusting len and seqnum */ + memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len); + iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + } + + iph->tot_len = cpu_to_be16(tot_len); + iph->saddr = cpu_to_be32(0x7f000001); + + tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); + tcph->ack_seq = cpu_to_be32(ack); + tcph->window = cpu_to_be16(wnd); + + nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd; + + memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags)); + fpdu_info->frag_cnt = frag_cnt; + fpdu_info->nesqp = nesqp; + *pau_fpdu_info = fpdu_info; + + /* Update skb's for next pass */ + for (i = 0; i < frag_cnt; i++) { + cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0]; + skb_pull(frags[i].skb, frags[i].frag_len); + + if (frags[i].skb->len == 0) { + /* Pull skb off the list - it will be freed in the callback */ + spin_lock_irqsave(&nesqp->pau_lock, flags); + skb_unlink(frags[i].skb, &nesqp->pau_list); + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + } else { + /* Last skb still has data so update the seq */ + iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); + } + } + +out: + if (rc) { + if (fpdu_info) { + if (fpdu_info->cqp_request) + nes_put_cqp_request(nesdev, fpdu_info->cqp_request); + kfree(fpdu_info); + } + } + return rc; +} + +/** + * forward_fpdu - send complete fpdus, one at a time + */ +static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct pau_fpdu_info *fpdu_info; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + u64 u64tmp; + u32 u32tmp; + int rc; + + while (1) { + rc = get_fpdu_info(nesdev, nesqp, &fpdu_info); + if (fpdu_info == NULL) + return rc; + + cqp_request = fpdu_info->cqp_request; + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX, + NES_CQP_DOWNLOAD_SEGMENT | + (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT)); + + u32tmp = fpdu_info->hdr_len << 16; + u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX, + u32tmp); + + u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX, + u32tmp); + + u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX, + u32tmp); + + u64tmp = (u64)fpdu_info->hdr_pbase; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX, + lower_32_bits(u64tmp)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX, + upper_32_bits(u64tmp >> 32)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, + lower_32_bits(fpdu_info->frags[0].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX, + upper_32_bits(fpdu_info->frags[0].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX, + lower_32_bits(fpdu_info->frags[1].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX, + upper_32_bits(fpdu_info->frags[1].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX, + lower_32_bits(fpdu_info->frags[2].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX, + upper_32_bits(fpdu_info->frags[2].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX, + lower_32_bits(fpdu_info->frags[3].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX, + upper_32_bits(fpdu_info->frags[3].physaddr)); + + cqp_request->cqp_callback_pointer = fpdu_info; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_download_callback; + + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request); + } + + return 0; +} + +static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + int again = 1; + unsigned long flags; + + do { + /* Ignore rc - if it failed, tcp retries will cause it to try again */ + forward_fpdus(nesvnic, nesqp); + + spin_lock_irqsave(&nesqp->pau_lock, flags); + if (nesqp->pau_pending) { + nesqp->pau_pending = 0; + } else { + nesqp->pau_busy = 0; + again = 0; + } + + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + } while (again); +} + +/** + * queue_fpdus - Handle fpdu's that hw passed up to sw + */ +static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct sk_buff *tmpskb; + struct nes_rskb_cb *cb; + struct iphdr *iph; + struct tcphdr *tcph; + unsigned char *tcph_end; + u32 rcv_nxt; + u32 rcv_wnd; + u32 seqnum; + u32 len; + bool process_it = false; + unsigned long flags; + + /* Move data ptr to after tcp header */ + iph = (struct iphdr *)skb->data; + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + seqnum = be32_to_cpu(tcph->seq); + tcph_end = (((char *)tcph) + (4 * tcph->doff)); + + len = be16_to_cpu(iph->tot_len); + if (skb->len > len) + skb_trim(skb, len); + skb_pull(skb, tcph_end - skb->data); + + /* Initialize tracking values */ + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->seqnum = seqnum; + + /* Make sure data is in the receive window */ + rcv_nxt = nesqp->pau_rcv_nxt; + rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd); + if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) { + nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + return; + } + + spin_lock_irqsave(&nesqp->pau_lock, flags); + + if (nesqp->pau_busy) + nesqp->pau_pending = 1; + else + nesqp->pau_busy = 1; + + /* Queue skb by sequence number */ + if (skb_queue_len(&nesqp->pau_list) == 0) { + skb_queue_head(&nesqp->pau_list, skb); + } else { + tmpskb = nesqp->pau_list.next; + while (tmpskb != (struct sk_buff *)&nesqp->pau_list) { + cb = (struct nes_rskb_cb *)&tmpskb->cb[0]; + if (before(seqnum, cb->seqnum)) + break; + tmpskb = tmpskb->next; + } + skb_insert(tmpskb, skb, &nesqp->pau_list); + } + if (nesqp->pau_state == PAU_READY) + process_it = true; + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + + if (process_it) + process_fpdus(nesvnic, nesqp); + + return; +} + +/** + * mgt_thread - Handle mgt skbs in a safe context + */ +static int mgt_thread(void *context) +{ + struct nes_vnic *nesvnic = context; + struct sk_buff *skb; + struct nes_rskb_cb *cb; + + while (!kthread_should_stop()) { + wait_event_interruptible(nesvnic->mgt_wait_queue, + skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop()); + while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) { + skb = skb_dequeue(&nesvnic->mgt_skb_list); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->data_start = skb->data - ETH_HLEN; + cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start, + nesvnic->max_frame_size, PCI_DMA_TODEVICE); + queue_fpdus(skb, nesvnic, cb->nesqp); + } + } + + /* Closing down so delete any entries on the queue */ + while (skb_queue_len(&nesvnic->mgt_skb_list)) { + skb = skb_dequeue(&nesvnic->mgt_skb_list); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + nes_rem_ref_cm_node(cb->nesqp->cm_node); + dev_kfree_skb_any(skb); + } + return 0; +} + +/** + * nes_queue_skbs - Queue skb so it can be handled in a thread context + */ +void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_rskb_cb *cb; + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->nesqp = nesqp; + skb_queue_tail(&nesvnic->mgt_skb_list, skb); + wake_up_interruptible(&nesvnic->mgt_wait_queue); +} + +void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp) +{ + struct sk_buff *skb; + unsigned long flags; + atomic_inc(&pau_qps_destroyed); + + /* Free packets that have not yet been forwarded */ + /* Lock is acquired by skb_dequeue when removing the skb */ + spin_lock_irqsave(&nesqp->pau_lock, flags); + while (skb_queue_len(&nesqp->pau_list)) { + skb = skb_dequeue(&nesqp->pau_list); + nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + spin_unlock_irqrestore(&nesqp->pau_lock, flags); +} + +static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer; + struct nes_cqp_request *new_request; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_adapter *nesadapter; + struct nes_qp *nesqp; + struct nes_v4_quad nes_quad; + u32 crc_value; + u64 u64temp; + + nesadapter = nesdev->nesadapter; + nesqp = qh_chg->nesqp; + + /* Should we handle the bad completion */ + if (cqp_request->major_code) { + printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n", + cqp_request->major_code); + WARN_ON(1); + } + + switch (nesqp->pau_state) { + case PAU_DEL_QH: + /* Old hash code deleted, now set the new one */ + nesqp->pau_state = PAU_ADD_LB_QH; + new_request = nes_get_cqp_request(nesdev); + if (new_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n"); + WARN_ON(1); + return; + } + + memset(&nes_quad, 0, sizeof(nes_quad)); + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + nes_quad.SrcIpadr = cpu_to_be32(0x7f000001); + nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]); + nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]); + + /* Produce hash key */ + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); + nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); + + nesqp->hte_index &= nesadapter->hte_index_mask; + nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); + nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001); + nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt); + + cqp_wqe = &new_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, + NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | + NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n"); + + new_request->cqp_callback_pointer = qh_chg; + new_request->callback = 1; + new_request->cqp_callback = nes_chg_qh_handler; + atomic_set(&new_request->refcount, 1); + nes_post_cqp_request(nesdev, new_request); + break; + + case PAU_ADD_LB_QH: + /* Start processing the queued fpdu's */ + nesqp->pau_state = PAU_READY; + process_fpdus(qh_chg->nesvnic, qh_chg->nesqp); + kfree(qh_chg); + break; + } +} + +/** + * nes_change_quad_hash + */ +static int nes_change_quad_hash(struct nes_device *nesdev, + struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_cqp_request *cqp_request = NULL; + struct pau_qh_chg *qh_chg = NULL; + u64 u64temp; + struct nes_hw_cqp_wqe *cqp_wqe; + int ret = 0; + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + ret = -ENOMEM; + goto chg_qh_err; + } + + qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC); + if (qh_chg == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + ret = -ENOMEM; + goto chg_qh_err; + } + qh_chg->nesdev = nesdev; + qh_chg->nesvnic = nesvnic; + qh_chg->nesqp = nesqp; + nesqp->pau_state = PAU_DEL_QH; + + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, + NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE | + NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n"); + + cqp_request->cqp_callback_pointer = qh_chg; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_chg_qh_handler; + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request); + + return ret; + +chg_qh_err: + kfree(qh_chg); + if (cqp_request) + nes_put_cqp_request(nesdev, cqp_request); + return ret; +} + +/** + * nes_mgt_ce_handler + * This management code deals with any packed and unaligned (pau) fpdu's + * that the hardware cannot handle. + */ +static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) +{ + struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq); + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 head; + u32 cq_size; + u32 cqe_count = 0; + u32 cqe_misc; + u32 qp_id = 0; + u32 skbs_needed; + unsigned long context; + struct nes_qp *nesqp; + struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; + + head = cq->cq_head; + cq_size = cq->cq_size; + + while (1) { + cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]); + if (!(cqe_misc & NES_NIC_CQE_VALID)) + break; + + nesqp = NULL; + if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) { + qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]); + qp_id &= 0x001fffff; + if (qp_id < nesadapter->max_qp) { + context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN]; + nesqp = (struct nes_qp *)context; + } + } + + if (nesqp) { + if (nesqp->pau_mode == false) { + nesqp->pau_mode = true; /* First time for this qp */ + nesqp->pau_rcv_nxt = le32_to_cpu( + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]); + skb_queue_head_init(&nesqp->pau_list); + spin_lock_init(&nesqp->pau_lock); + atomic_inc(&pau_qps_created); + nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp); + } + + rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; + rx_skb->len = 0; + skb_put(rx_skb, cqe_misc & 0x0000ffff); + rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev); + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE); + cb->busaddr = 0; + mgtvnic->mgt.rq_tail++; + mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1; + + nes_add_ref_cm_node(nesqp->cm_node); + nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp); + } else { + printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id); + } + + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0; + cqe_count++; + if (++head >= cq_size) + head = 0; + + if (cqe_count == 255) { + /* Replenish mgt CQ */ + nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); + nesdev->currcq_count += cqe_count; + cqe_count = 0; + } + + skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed); + if (skbs_needed > (mgtvnic->mgt.rq_size >> 1)) + nes_replenish_mgt_rq(mgtvnic); + } + + cq->cq_head = head; + nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + cq->cq_number | (cqe_count << 16)); + nes_read32(nesdev->regs + NES_CQE_ALLOC); + nesdev->currcq_count += cqe_count; +} + +/** + * nes_init_mgt_qp + */ +int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic) +{ + struct nes_vnic_mgt *mgtvnic; + u32 counter; + void *vmem; + dma_addr_t pmem; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 cqp_head; + unsigned long flags; + struct nes_hw_nic_qp_context *mgt_context; + u64 u64temp; + struct nes_hw_nic_rq_wqe *mgt_rqe; + struct sk_buff *skb; + u32 wqe_count; + struct nes_rskb_cb *cb; + u32 mgt_mem_size; + void *mgt_vbase; + dma_addr_t mgt_pbase; + int i; + int ret; + + /* Allocate space the all mgt QPs once */ + mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL); + if (mgtvnic == NULL) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n"); + return -ENOMEM; + } + + /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */ + /* We are not sending from this NIC so sq is not allocated */ + mgt_mem_size = 256 + + (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) + + (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) + + sizeof(struct nes_hw_nic_qp_context); + mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase); + if (!mgt_vbase) { + kfree(mgtvnic); + nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n"); + return -ENOMEM; + } + + nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size; + nesvnic->mgt_vbase = mgt_vbase; + nesvnic->mgt_pbase = mgt_pbase; + + skb_queue_head_init(&nesvnic->mgt_skb_list); + init_waitqueue_head(&nesvnic->mgt_wait_queue); + nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread"); + + for (i = 0; i < NES_MGT_QP_COUNT; i++) { + mgtvnic->nesvnic = nesvnic; + mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i; + memset(mgt_vbase, 0, mgt_mem_size); + nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n", + mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size); + + vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) & + ~(unsigned long)(256 - 1)); + pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) & + ~(unsigned long long)(256 - 1)); + + spin_lock_init(&mgtvnic->mgt.rq_lock); + + /* setup the RQ */ + mgtvnic->mgt.rq_vbase = vmem; + mgtvnic->mgt.rq_pbase = pmem; + mgtvnic->mgt.rq_head = 0; + mgtvnic->mgt.rq_tail = 0; + mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT; + + /* setup the CQ */ + vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); + pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); + + mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id; + mgtvnic->mgt_cq.cq_vbase = vmem; + mgtvnic->mgt_cq.cq_pbase = pmem; + mgtvnic->mgt_cq.cq_head = 0; + mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT; + + mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler; + + /* Send CreateCQ request to CQP */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + cqp_head = nesdev->cqp.sq_head; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + ((u32)mgtvnic->mgt_cq.cq_size << 16)); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( + mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)); + u64temp = (u64)mgtvnic->mgt_cq.cq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (unsigned long)&mgtvnic->mgt_cq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + /* Send CreateQP request to CQP */ + mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]); + mgt_context->context_words[NES_NIC_CTX_MISC_IDX] = + cpu_to_le32((u32)NES_MGT_CTX_SIZE | + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12)); + nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); + if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) + mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); + + u64temp = (u64)mgtvnic->mgt.rq_pbase; + mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)mgtvnic->mgt.rq_pbase; + mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | + NES_CQP_QP_TYPE_NIC); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id); + u64temp = (u64)mgtvnic->mgt_cq.cq_pbase + + (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe)); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + nesdev->cqp.sq_head = cqp_head; + + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n", + mgtvnic->mgt.qp_id); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n", + mgtvnic->mgt.qp_id, ret); + if (!ret) { + nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id); + if (i == 0) { + pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, + nesvnic->mgt_pbase); + kfree(mgtvnic); + } else { + nes_destroy_mgt(nesvnic); + } + return -EIO; + } + + /* Populate the RQ */ + for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) { + skb = dev_alloc_skb(nesvnic->max_frame_size); + if (!skb) { + nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name); + return -ENOMEM; + } + + skb->dev = netdev; + + pmem = pci_map_single(nesdev->pcidev, skb->data, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = pmem; + cb->maplen = nesvnic->max_frame_size; + + mgt_rqe = &mgtvnic->mgt.rq_vbase[counter]; + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size); + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); + mgtvnic->mgt.rx_skb[counter] = skb; + } + + init_timer(&mgtvnic->rq_wqes_timer); + mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout; + mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic; + + wqe_count = NES_MGT_WQ_COUNT - 1; + mgtvnic->mgt.rq_head = wqe_count; + barrier(); + do { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id); + } while (wqe_count); + + nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + mgtvnic->mgt_cq.cq_number); + nes_read32(nesdev->regs + NES_CQE_ALLOC); + + mgt_vbase += mgt_mem_size; + mgt_pbase += mgt_mem_size; + nesvnic->mgtvnic[i] = mgtvnic++; + } + return 0; +} + + +void nes_destroy_mgt(struct nes_vnic *nesvnic) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_vnic_mgt *mgtvnic; + struct nes_vnic_mgt *first_mgtvnic; + unsigned long flags; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 cqp_head; + struct sk_buff *rx_skb; + int i; + int ret; + + kthread_stop(nesvnic->mgt_thread); + + /* Free remaining NIC receive buffers */ + first_mgtvnic = nesvnic->mgtvnic[0]; + for (i = 0; i < NES_MGT_QP_COUNT; i++) { + mgtvnic = nesvnic->mgtvnic[i]; + if (mgtvnic == NULL) + continue; + + while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) { + rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; + nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE); + mgtvnic->mgt.rq_tail++; + mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1); + } + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy NIC QP */ + cqp_head = nesdev->cqp.sq_head; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + mgtvnic->mgt.qp_id); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + + /* Destroy NIC CQ */ + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16))); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + nesdev->cqp.sq_head = cqp_head; + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u," + " cqp.sq_tail=%u, cqp.sq_size=%u\n", + cqp_head, nesdev->cqp.sq_head, + nesdev->cqp.sq_tail, nesdev->cqp.sq_size); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + + nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u," + " cqp.sq_head=%u, cqp.sq_tail=%u\n", + ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); + if (!ret) + nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n", + mgtvnic->mgt.qp_id); + + nesvnic->mgtvnic[i] = NULL; + } + + if (nesvnic->mgt_vbase) { + pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, + nesvnic->mgt_pbase); + nesvnic->mgt_vbase = NULL; + nesvnic->mgt_pbase = 0; + } + + kfree(first_mgtvnic); +} diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h new file mode 100644 index 00000000..4f7f701c --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_mgt.h @@ -0,0 +1,97 @@ +/* +* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __NES_MGT_H +#define __NES_MGT_H + +#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */ + +int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic); +void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp); +void nes_destroy_mgt(struct nes_vnic *nesvnic); +void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp); + +struct nes_hw_mgt { + struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */ + dma_addr_t rq_pbase; /* PCI memory for host rings */ + struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; + u16 qp_id; + u16 sq_head; + u16 rq_head; + u16 rq_tail; + u16 rq_size; + u8 replenishing_rq; + u8 reserved; + spinlock_t rq_lock; +}; + +struct nes_vnic_mgt { + struct nes_vnic *nesvnic; + struct nes_hw_mgt mgt; + struct nes_hw_nic_cq mgt_cq; + atomic_t rx_skbs_needed; + struct timer_list rq_wqes_timer; + atomic_t rx_skb_timer_running; +}; + +#define MAX_FPDU_FRAGS 4 +struct pau_fpdu_frag { + struct sk_buff *skb; + u64 physaddr; + u32 frag_len; + bool cmplt; +}; + +struct pau_fpdu_info { + struct nes_qp *nesqp; + struct nes_cqp_request *cqp_request; + void *hdr_vbase; + dma_addr_t hdr_pbase; + int hdr_len; + u16 data_len; + u16 frag_cnt; + struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; +}; + +enum pau_qh_state { + PAU_DEL_QH, + PAU_ADD_LB_QH, + PAU_READY +}; + +struct pau_qh_chg { + struct nes_device *nesdev; + struct nes_vnic *nesvnic; + struct nes_qp *nesqp; +}; + +#endif /* __NES_MGT_H */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c new file mode 100644 index 00000000..f3a3ecf8 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -0,0 +1,1887 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "nes.h" + +static struct nic_qp_map nic_qp_mapping_0[] = { + {16,0,0,1},{24,4,0,0},{28,8,0,0},{32,12,0,0}, + {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0}, + {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0}, + {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map nic_qp_mapping_1[] = { + {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0}, + {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map nic_qp_mapping_2[] = { + {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0} +}; + +static struct nic_qp_map nic_qp_mapping_3[] = { + {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map nic_qp_mapping_4[] = { + {28,8,0,0},{32,12,0,0} +}; + +static struct nic_qp_map nic_qp_mapping_5[] = { + {29,9,1,0},{33,13,1,0} +}; + +static struct nic_qp_map nic_qp_mapping_6[] = { + {30,10,2,0},{34,14,2,0} +}; + +static struct nic_qp_map nic_qp_mapping_7[] = { + {31,11,3,0},{35,15,3,0} +}; + +static struct nic_qp_map *nic_qp_mapping_per_function[] = { + nic_qp_mapping_0, nic_qp_mapping_1, nic_qp_mapping_2, nic_qp_mapping_3, + nic_qp_mapping_4, nic_qp_mapping_5, nic_qp_mapping_6, nic_qp_mapping_7 +}; + +static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK + | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; +static int debug = -1; +static int nics_per_function = 1; + +/** + * nes_netdev_poll + */ +static int nes_netdev_poll(struct napi_struct *napi, int budget) +{ + struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq; + + nesvnic->budget = budget; + nescq->cqes_pending = 0; + nescq->rx_cqes_completed = 0; + nescq->cqe_allocs_pending = 0; + nescq->rx_pkts_indicated = 0; + + nes_nic_ce_handler(nesdev, nescq); + + if (nescq->cqes_pending == 0) { + napi_complete(napi); + /* clear out completed cqes and arm */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + nescq->cq_number | (nescq->cqe_allocs_pending << 16)); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + } else { + /* clear out completed cqes but don't arm */ + nes_write32(nesdev->regs+NES_CQE_ALLOC, + nescq->cq_number | (nescq->cqe_allocs_pending << 16)); + nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n", + nesvnic->netdev->name); + } + return nescq->rx_pkts_indicated; +} + + +/** + * nes_netdev_open - Activate the network interface; ifconfig + * ethx up. + */ +static int nes_netdev_open(struct net_device *netdev) +{ + u32 macaddr_low; + u16 macaddr_high; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + int ret; + int i; + struct nes_vnic *first_nesvnic = NULL; + u32 nic_active_bit; + u32 nic_active; + struct list_head *list_pos, *list_temp; + unsigned long flags; + + assert(nesdev != NULL); + + if (nesvnic->netdev_open == 1) + return 0; + + if (netif_msg_ifup(nesvnic)) + printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name); + + ret = nes_init_nic_qp(nesdev, netdev); + if (ret) { + return ret; + } + + netif_carrier_off(netdev); + netif_stop_queue(netdev); + + if ((!nesvnic->of_device_registered) && (nesvnic->rdma_enabled)) { + nesvnic->nesibdev = nes_init_ofa_device(netdev); + if (nesvnic->nesibdev == NULL) { + printk(KERN_ERR PFX "%s: nesvnic->nesibdev alloc failed", netdev->name); + } else { + nesvnic->nesibdev->nesvnic = nesvnic; + ret = nes_register_ofa_device(nesvnic->nesibdev); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to register RDMA device, ret = %d\n", + netdev->name, ret); + } + } + } + /* Set packet filters */ + nic_active_bit = 1 << nesvnic->nic_index; + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); + + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high += (u16)netdev->dev_addr[1]; + + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; + + /* Program the various MAC regs */ + for (i = 0; i < NES_MAX_PORT_COUNT; i++) { + if (nesvnic->qp_nic_index[i] == 0xf) { + break; + } + nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW" + " (Addr:%08X) = %08X, HIGH = %08X.\n", + i, nesvnic->qp_nic_index[i], + NES_IDX_PERFECT_FILTER_LOW+ + (nesvnic->qp_nic_index[i] * 8), + macaddr_low, + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)nesvnic->nic_index) << 16))); + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8), + macaddr_low); + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)nesvnic->nic_index) << 16))); + } + + + nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + nesvnic->nic_cq.cq_number); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { + first_nesvnic = container_of(list_pos, struct nes_vnic, list); + if (first_nesvnic->netdev_open == 1) + break; + } + if (first_nesvnic->netdev_open == 0) { + nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + first_nesvnic = nesvnic; + } + + if (first_nesvnic->linkup) { + /* Enable network packets */ + nesvnic->linkup = 1; + netif_start_queue(netdev); + netif_carrier_on(netdev); + } + + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { + if (nesdev->link_recheck) + cancel_delayed_work(&nesdev->work); + nesdev->link_recheck = 1; + schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); + } + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + + spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); + if (nesvnic->of_device_registered) { + nesdev->nesadapter->send_term_ok = 1; + if (nesvnic->linkup == 1) { + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + } else { + nesdev->iw_status = 0; + } + } + spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); + + napi_enable(&nesvnic->napi); + nesvnic->netdev_open = 1; + + return 0; +} + + +/** + * nes_netdev_stop + */ +static int nes_netdev_stop(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 nic_active_mask; + u32 nic_active; + struct nes_vnic *first_nesvnic = NULL; + struct list_head *list_pos, *list_temp; + unsigned long flags; + + nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", + nesvnic, nesdev, netdev, netdev->name); + if (nesvnic->netdev_open == 0) + return 0; + + if (netif_msg_ifdown(nesvnic)) + printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name); + netif_carrier_off(netdev); + + /* Disable network packets */ + napi_disable(&nesvnic->napi); + netif_stop_queue(netdev); + list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { + first_nesvnic = container_of(list_pos, struct nes_vnic, list); + if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic)) + break; + } + + if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) && + (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != + PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+ + (0x200*nesdev->mac_index), 0xffffffff); + nes_write_indexed(first_nesvnic->nesdev, + NES_IDX_MAC_INT_MASK+ + (0x200*first_nesvnic->nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + } else { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); + } + + nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); + nes_write_indexed(nesdev, NES_IDX_PERFECT_FILTER_HIGH+ + (nesvnic->perfect_filter_index*8), 0); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); + + spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); + if (nesvnic->of_device_registered) { + nesdev->nesadapter->send_term_ok = 0; + nesdev->iw_status = 0; + if (nesvnic->linkup == 1) + nes_port_ibevent(nesvnic); + } + del_timer_sync(&nesvnic->event_timer); + nesvnic->event_timer.function = NULL; + spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); + + nes_destroy_nic_qp(nesvnic); + + nesvnic->netdev_open = 0; + + return 0; +} + + +/** + * nes_nic_send + */ +static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_nic *nesnic = &nesvnic->nic; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct tcphdr *tcph; + __le16 *wqe_fragment_length; + u32 wqe_misc; + u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */ + u16 skb_fragment_index; + dma_addr_t bus_address; + + nic_sqe = &nesnic->sq_vbase[nesnic->sq_head]; + wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + + /* setup the VLAN tag if present */ + if (vlan_tx_tag_present(skb)) { + nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n", + netdev->name, vlan_tx_tag_get(skb)); + wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE; + wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb); + } else + wqe_misc = 0; + + /* bump past the vlan tag */ + wqe_fragment_length++; + /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */ + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tcph = tcp_hdr(skb); + if (1) { + if (skb_is_gso(skb)) { + /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n", + netdev->name, skb_is_gso(skb)); */ + wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | + NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, + ((u32)tcph->doff) | + (((u32)(((unsigned char *)tcph) - skb->data)) << 4)); + } else { + wqe_misc |= NES_NIC_SQ_WQE_COMPLETION; + } + } + } else { /* CHECKSUM_HW */ + wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION; + } + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, + skb->len); + memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, + skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), skb_headlen(skb))); + wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE), + skb_headlen(skb))); + wqe_fragment_length[1] = 0; + if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { + if ((skb_shinfo(skb)->nr_frags + 1) > 4) { + nes_debug(NES_DBG_NIC_TX, "%s: Packet with %u fragments not sent, skb_headlen=%u\n", + netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb)); + kfree_skb(skb); + nesvnic->tx_sw_dropped++; + return NETDEV_TX_LOCKED; + } + set_bit(nesnic->sq_head, nesnic->first_frag_overflow); + bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE, + skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE); + wqe_fragment_length[wqe_fragment_index++] = + cpu_to_le16(skb_headlen(skb) - NES_FIRST_FRAG_SIZE); + wqe_fragment_length[wqe_fragment_index] = 0; + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, + ((u64)(bus_address))); + nesnic->tx_skb[nesnic->sq_head] = skb; + } + + if (skb_headlen(skb) == skb->len) { + if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) { + nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0; + nesnic->tx_skb[nesnic->sq_head] = skb; + } + } else { + /* Deal with Fragments */ + nesnic->tx_skb[nesnic->sq_head] = skb; + for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags; + skb_fragment_index++) { + skb_frag_t *frag = + &skb_shinfo(skb)->frags[skb_fragment_index]; + bus_address = skb_frag_dma_map(&nesdev->pcidev->dev, + frag, 0, skb_frag_size(frag), + DMA_TO_DEVICE); + wqe_fragment_length[wqe_fragment_index] = + cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index])); + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), + bus_address); + wqe_fragment_index++; + if (wqe_fragment_index < 5) + wqe_fragment_length[wqe_fragment_index] = 0; + } + } + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc); + nesnic->sq_head++; + nesnic->sq_head &= nesnic->sq_size - 1; + + return NETDEV_TX_OK; +} + + +/** + * nes_netdev_start_xmit + */ +static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_hw_nic *nesnic = &nesvnic->nic; + struct nes_hw_nic_sq_wqe *nic_sqe; + struct tcphdr *tcph; + /* struct udphdr *udph; */ +#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS + /* 64K segment plus overflow on each side */ + dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS]; + dma_addr_t bus_address; + u32 tso_frag_index; + u32 tso_frag_count; + u32 tso_wqe_length; + u32 curr_tcp_seq; + u32 wqe_count=1; + u32 send_rc; + struct iphdr *iph; + __le16 *wqe_fragment_length; + u32 nr_frags; + u32 original_first_length; + /* u64 *wqe_fragment_address; */ + /* first fragment (0) is used by copy buffer */ + u16 wqe_fragment_index=1; + u16 hoffset; + u16 nhoffset; + u16 wqes_needed; + u16 wqes_available; + u32 wqe_misc; + + /* + * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," + * " (%u frags), tso_size=%u\n", + * netdev->name, skb->len, skb_headlen(skb), + * skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + */ + + if (!netif_carrier_ok(netdev)) + return NETDEV_TX_OK; + + if (netif_queue_stopped(netdev)) + return NETDEV_TX_BUSY; + + /* Check if SQ is full */ + if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) { + if (!netif_queue_stopped(netdev)) { + netif_stop_queue(netdev); + barrier(); + if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) { + netif_start_queue(netdev); + goto sq_no_longer_full; + } + } + nesvnic->sq_full++; + return NETDEV_TX_BUSY; + } + +sq_no_longer_full: + nr_frags = skb_shinfo(skb)->nr_frags; + if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { + nr_frags++; + } + /* Check if too many fragments */ + if (unlikely((nr_frags > 4))) { + if (skb_is_gso(skb)) { + nesvnic->segmented_tso_requests++; + nesvnic->tso_requests++; + /* Basically 4 fragments available per WQE with extended fragments */ + wqes_needed = nr_frags >> 2; + wqes_needed += (nr_frags&3)?1:0; + wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) & + (nesnic->sq_size - 1); + + if (unlikely(wqes_needed > wqes_available)) { + if (!netif_queue_stopped(netdev)) { + netif_stop_queue(netdev); + barrier(); + wqes_available = (((((volatile u16)nesnic->sq_tail)+nesnic->sq_size)-nesnic->sq_head) - 1) & + (nesnic->sq_size - 1); + if (wqes_needed <= wqes_available) { + netif_start_queue(netdev); + goto tso_sq_no_longer_full; + } + } + nesvnic->sq_full++; + nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n", + netdev->name); + return NETDEV_TX_BUSY; + } +tso_sq_no_longer_full: + /* Map all the buffers */ + for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags; + tso_frag_count++) { + skb_frag_t *frag = + &skb_shinfo(skb)->frags[tso_frag_count]; + tso_bus_address[tso_frag_count] = + skb_frag_dma_map(&nesdev->pcidev->dev, + frag, 0, skb_frag_size(frag), + DMA_TO_DEVICE); + } + + tso_frag_index = 0; + curr_tcp_seq = ntohl(tcp_hdr(skb)->seq); + hoffset = skb_transport_header(skb) - skb->data; + nhoffset = skb_network_header(skb) - skb->data; + original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2); + + for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) { + tso_wqe_length = 0; + nic_sqe = &nesnic->sq_vbase[nesnic->sq_head]; + wqe_fragment_length = + (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* setup the VLAN tag if present */ + if (vlan_tx_tag_present(skb)) { + nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n", + netdev->name, vlan_tx_tag_get(skb) ); + wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE; + wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb); + } else + wqe_misc = 0; + + /* bump past the vlan tag */ + wqe_fragment_length++; + + /* Assumes header totally fits in allocated buffer and is in first fragment */ + if (original_first_length > NES_FIRST_FRAG_SIZE) { + nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n", + original_first_length, NES_FIRST_FRAG_SIZE); + nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," + " (%u frags), tso_size=%u\n", + netdev->name, + skb->len, skb_headlen(skb), + skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + } + memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, + skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), + original_first_length)); + iph = (struct iphdr *) + (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]); + tcph = (struct tcphdr *) + (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[hoffset]); + if ((wqe_count+1)!=(u32)wqes_needed) { + tcph->fin = 0; + tcph->psh = 0; + tcph->rst = 0; + tcph->urg = 0; + } + if (wqe_count) { + tcph->syn = 0; + } + tcph->seq = htonl(curr_tcp_seq); + wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE), + original_first_length)); + + wqe_fragment_index = 1; + if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) { + set_bit(nesnic->sq_head, nesnic->first_frag_overflow); + bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length, + skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE); + wqe_fragment_length[wqe_fragment_index++] = + cpu_to_le16(skb_headlen(skb) - original_first_length); + wqe_fragment_length[wqe_fragment_index] = 0; + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, + bus_address); + tso_wqe_length += skb_headlen(skb) - + original_first_length; + } + while (wqe_fragment_index < 5) { + wqe_fragment_length[wqe_fragment_index] = + cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index])); + set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), + (u64)tso_bus_address[tso_frag_index]); + wqe_fragment_index++; + tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]); + if (wqe_fragment_index < 5) + wqe_fragment_length[wqe_fragment_index] = 0; + if (tso_frag_index == tso_frag_count) + break; + } + if ((wqe_count+1) == (u32)wqes_needed) { + nesnic->tx_skb[nesnic->sq_head] = skb; + } else { + nesnic->tx_skb[nesnic->sq_head] = NULL; + } + wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); + if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) { + wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; + } else { + iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset); + } + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, + wqe_misc); + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, + ((u32)tcph->doff) | (((u32)hoffset) << 4)); + + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, + tso_wqe_length + original_first_length); + curr_tcp_seq += tso_wqe_length; + nesnic->sq_head++; + nesnic->sq_head &= nesnic->sq_size-1; + } + } else { + nesvnic->linearized_skbs++; + hoffset = skb_transport_header(skb) - skb->data; + nhoffset = skb_network_header(skb) - skb->data; + skb_linearize(skb); + skb_set_transport_header(skb, hoffset); + skb_set_network_header(skb, nhoffset); + send_rc = nes_nic_send(skb, netdev); + if (send_rc != NETDEV_TX_OK) + return NETDEV_TX_OK; + } + } else { + send_rc = nes_nic_send(skb, netdev); + if (send_rc != NETDEV_TX_OK) + return NETDEV_TX_OK; + } + + barrier(); + + if (wqe_count) + nes_write32(nesdev->regs+NES_WQE_ALLOC, + (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id); + + netdev->trans_start = jiffies; + + return NETDEV_TX_OK; +} + + +/** + * nes_netdev_get_stats + */ +static struct net_device_stats *nes_netdev_get_stats(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u64 u64temp; + u32 u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + (nesvnic->nic_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->endnode_nstat_rx_discard += u32temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_rx_octets += u64temp; + nesvnic->netstats.rx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_rx_frames += u64temp; + nesvnic->netstats.rx_packets += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_tx_octets += u64temp; + nesvnic->netstats.tx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + (nesvnic->nic_index*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32; + + nesvnic->endnode_nstat_tx_frames += u64temp; + nesvnic->netstats.tx_packets += u64temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_short_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_oversized_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_jabber_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_length_errors += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_crc_errors += u32temp; + nesvnic->netstats.rx_crc_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_tx_errors += u32temp; + nesvnic->netstats.tx_errors += u32temp; + + return &nesvnic->netstats; +} + + +/** + * nes_netdev_tx_timeout + */ +static void nes_netdev_tx_timeout(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + if (netif_msg_timer(nesvnic)) + nes_debug(NES_DBG_NIC_TX, "%s: tx timeout\n", netdev->name); +} + + +/** + * nes_netdev_set_mac_address + */ +static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct sockaddr *mac_addr = p; + int i; + u32 macaddr_low; + u16 macaddr_high; + + if (!is_valid_ether_addr(mac_addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len); + printk(PFX "%s: Address length = %d, Address = %pM\n", + __func__, netdev->addr_len, mac_addr->sa_data); + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high += (u16)netdev->dev_addr[1]; + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; + + for (i = 0; i < NES_MAX_PORT_COUNT; i++) { + if (nesvnic->qp_nic_index[i] == 0xf) { + break; + } + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8), + macaddr_low); + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)nesvnic->nic_index) << 16))); + } + return 0; +} + + +static void set_allmulti(struct nes_device *nesdev, u32 nic_active_bit) +{ + u32 nic_active; + + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); +} + +#define get_addr(addrs, index) ((addrs) + (index) * ETH_ALEN) + +/** + * nes_netdev_set_multicast_list + */ +static void nes_netdev_set_multicast_list(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + u32 nic_active_bit; + u32 nic_active; + u32 perfect_filter_register_address; + u32 macaddr_low; + u16 macaddr_high; + u8 mc_all_on = 0; + u8 mc_index; + int mc_nic_index = -1; + u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count * + nics_per_function, 4); + u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated; + unsigned long flags; + int mc_count = netdev_mc_count(netdev); + + spin_lock_irqsave(&nesadapter->resource_lock, flags); + nic_active_bit = 1 << nesvnic->nic_index; + + if (netdev->flags & IFF_PROMISC) { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + mc_all_on = 1; + } else if ((netdev->flags & IFF_ALLMULTI) || + (nesvnic->nic_index > 3)) { + set_allmulti(nesdev, nic_active_bit); + mc_all_on = 1; + } else { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + } + + nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscuous = %d, All Multicast = %d.\n", + mc_count, !!(netdev->flags & IFF_PROMISC), + !!(netdev->flags & IFF_ALLMULTI)); + if (!mc_all_on) { + char *addrs; + int i; + struct netdev_hw_addr *ha; + + addrs = kmalloc(ETH_ALEN * mc_count, GFP_ATOMIC); + if (!addrs) { + set_allmulti(nesdev, nic_active_bit); + goto unlock; + } + i = 0; + netdev_for_each_mc_addr(ha, netdev) + memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN); + + perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + + pft_entries_preallocated * 0x8; + for (i = 0, mc_index = 0; mc_index < max_pft_entries_avaiable; + mc_index++) { + while (i < mc_count && nesvnic->mcrq_mcast_filter && + ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, + get_addr(addrs, i++))) == 0)); + if (mc_nic_index < 0) + mc_nic_index = nesvnic->nic_index; + while (nesadapter->pft_mcast_map[mc_index] < 16 && + nesadapter->pft_mcast_map[mc_index] != + nesvnic->nic_index && + mc_index < max_pft_entries_avaiable) { + nes_debug(NES_DBG_NIC_RX, + "mc_index=%d skipping nic_index=%d, " + "used for=%d \n", mc_index, + nesvnic->nic_index, + nesadapter->pft_mcast_map[mc_index]); + mc_index++; + } + if (mc_index >= max_pft_entries_avaiable) + break; + if (i < mc_count) { + char *addr = get_addr(addrs, i++); + + nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %pM to register 0x%04X nic_idx=%d\n", + addr, + perfect_filter_register_address+(mc_index * 8), + mc_nic_index); + macaddr_high = ((u16) addr[0]) << 8; + macaddr_high += (u16) addr[1]; + macaddr_low = ((u32) addr[2]) << 24; + macaddr_low += ((u32) addr[3]) << 16; + macaddr_low += ((u32) addr[4]) << 8; + macaddr_low += (u32) addr[5]; + nes_write_indexed(nesdev, + perfect_filter_register_address+(mc_index * 8), + macaddr_low); + nes_write_indexed(nesdev, + perfect_filter_register_address+4+(mc_index * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)(1<pft_mcast_map[mc_index] = + nesvnic->nic_index; + } else { + nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n", + perfect_filter_register_address+(mc_index * 8)); + nes_write_indexed(nesdev, + perfect_filter_register_address+4+(mc_index * 8), + 0); + nesadapter->pft_mcast_map[mc_index] = 255; + } + } + kfree(addrs); + /* PFT is not large enough */ + if (i < mc_count) + set_allmulti(nesdev, nic_active_bit); + } + +unlock: + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); +} + + +/** + * nes_netdev_change_mtu + */ +static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + int ret = 0; + u8 jumbomode = 0; + u32 nic_active; + u32 nic_active_bit; + u32 uc_all_active; + u32 mc_all_active; + + if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) + return -EINVAL; + + netdev->mtu = new_mtu; + nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; + + if (netdev->mtu > 1500) { + jumbomode=1; + } + nes_nic_init_timer_defaults(nesdev, jumbomode); + + if (netif_running(netdev)) { + nic_active_bit = 1 << nesvnic->nic_index; + mc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit; + uc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_UNICAST_ALL) & nic_active_bit; + + nes_netdev_stop(netdev); + nes_netdev_open(netdev); + + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL); + nic_active |= mc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, + nic_active); + + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active |= uc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + } + + return ret; +} + + +static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { + "Link Change Interrupts", + "Linearized SKBs", + "T/GSO Requests", + "Pause Frames Sent", + "Pause Frames Received", + "Internal Routing Errors", + "SQ SW Dropped SKBs", + "SQ Full", + "Segmented TSO Requests", + "Rx Symbol Errors", + "Rx Jabber Errors", + "Rx Oversized Frames", + "Rx Short Frames", + "Rx Length Errors", + "Rx CRC Errors", + "Rx Port Discard", + "Endnode Rx Discards", + "Endnode Rx Octets", + "Endnode Rx Frames", + "Endnode Tx Octets", + "Endnode Tx Frames", + "Tx Errors", + "mh detected", + "mh pauses", + "Retransmission Count", + "CM Connects", + "CM Accepts", + "Disconnects", + "Connected Events", + "Connect Requests", + "CM Rejects", + "ModifyQP Timeouts", + "CreateQPs", + "SW DestroyQPs", + "DestroyQPs", + "CM Closes", + "CM Packets Sent", + "CM Packets Bounced", + "CM Packets Created", + "CM Packets Rcvd", + "CM Packets Dropped", + "CM Packets Retrans", + "CM Listens Created", + "CM Listens Destroyed", + "CM Backlog Drops", + "CM Loopbacks", + "CM Nodes Created", + "CM Nodes Destroyed", + "CM Accel Drops", + "CM Resets Received", + "Free 4Kpbls", + "Free 256pbls", + "Timer Inits", + "LRO aggregated", + "LRO flushed", + "LRO no_desc", + "PAU CreateQPs", + "PAU DestroyQPs", +}; +#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset) + + +/** + * nes_netdev_get_sset_count + */ +static int nes_netdev_get_sset_count(struct net_device *netdev, int stringset) +{ + if (stringset == ETH_SS_STATS) + return NES_ETHTOOL_STAT_COUNT; + else + return -EINVAL; +} + + +/** + * nes_netdev_get_strings + */ +static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset, + u8 *ethtool_strings) +{ + if (stringset == ETH_SS_STATS) + memcpy(ethtool_strings, + &nes_ethtool_stringset, + sizeof(nes_ethtool_stringset)); +} + + +/** + * nes_netdev_get_ethtool_stats + */ + +static void nes_netdev_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values) +{ + u64 u64temp; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 nic_count; + u32 u32temp; + u32 index = 0; + + target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT; + target_stat_values[index] = nesvnic->nesdev->link_status_interrupts; + target_stat_values[++index] = nesvnic->linearized_skbs; + target_stat_values[++index] = nesvnic->tso_requests; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_pause_frames_sent += u32temp; + target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_sent; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_pause_frames_received += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); + nesvnic->nesdev->port_rx_discards += u32temp; + nesvnic->netstats.rx_dropped += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); + nesvnic->nesdev->port_tx_discards += u32temp; + nesvnic->netstats.tx_dropped += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_short_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_oversized_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_jabber_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_length_errors += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_crc_errors += u32temp; + nesvnic->netstats.rx_crc_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_tx_errors += u32temp; + nesvnic->netstats.tx_errors += u32temp; + + for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) { + if (nesvnic->qp_nic_index[nic_count] == 0xf) + break; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + + (nesvnic->qp_nic_index[nic_count]*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->endnode_nstat_rx_discard += u32temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_rx_octets += u64temp; + nesvnic->netstats.rx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_rx_frames += u64temp; + nesvnic->netstats.rx_packets += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_tx_octets += u64temp; + nesvnic->netstats.tx_bytes += u64temp; + + u64temp = (u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + + (nesvnic->qp_nic_index[nic_count]*0x200)); + u64temp += ((u64)nes_read_indexed(nesdev, + NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + + (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; + + nesvnic->endnode_nstat_tx_frames += u64temp; + nesvnic->netstats.tx_packets += u64temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_IPV4_TCP_REXMITS + (nesvnic->qp_nic_index[nic_count]*0x200)); + nesvnic->endnode_ipv4_tcp_retransmits += u32temp; + } + + target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_received; + target_stat_values[++index] = nesdev->nesadapter->nic_rx_eth_route_err; + target_stat_values[++index] = nesvnic->tx_sw_dropped; + target_stat_values[++index] = nesvnic->sq_full; + target_stat_values[++index] = nesvnic->segmented_tso_requests; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_symbol_err_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_jabber_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_oversized_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_short_frames; + target_stat_values[++index] = nesvnic->netstats.rx_length_errors; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_crc_errors; + target_stat_values[++index] = nesvnic->nesdev->port_rx_discards; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_discard; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_octets; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_frames; + target_stat_values[++index] = nesvnic->endnode_nstat_tx_octets; + target_stat_values[++index] = nesvnic->endnode_nstat_tx_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_tx_errors; + target_stat_values[++index] = mh_detected; + target_stat_values[++index] = mh_pauses_sent; + target_stat_values[++index] = nesvnic->endnode_ipv4_tcp_retransmits; + target_stat_values[++index] = atomic_read(&cm_connects); + target_stat_values[++index] = atomic_read(&cm_accepts); + target_stat_values[++index] = atomic_read(&cm_disconnects); + target_stat_values[++index] = atomic_read(&cm_connecteds); + target_stat_values[++index] = atomic_read(&cm_connect_reqs); + target_stat_values[++index] = atomic_read(&cm_rejects); + target_stat_values[++index] = atomic_read(&mod_qp_timouts); + target_stat_values[++index] = atomic_read(&qps_created); + target_stat_values[++index] = atomic_read(&sw_qps_destroyed); + target_stat_values[++index] = atomic_read(&qps_destroyed); + target_stat_values[++index] = atomic_read(&cm_closes); + target_stat_values[++index] = cm_packets_sent; + target_stat_values[++index] = cm_packets_bounced; + target_stat_values[++index] = cm_packets_created; + target_stat_values[++index] = cm_packets_received; + target_stat_values[++index] = cm_packets_dropped; + target_stat_values[++index] = cm_packets_retrans; + target_stat_values[++index] = atomic_read(&cm_listens_created); + target_stat_values[++index] = atomic_read(&cm_listens_destroyed); + target_stat_values[++index] = cm_backlog_drops; + target_stat_values[++index] = atomic_read(&cm_loopbacks); + target_stat_values[++index] = atomic_read(&cm_nodes_created); + target_stat_values[++index] = atomic_read(&cm_nodes_destroyed); + target_stat_values[++index] = atomic_read(&cm_accel_dropped_pkts); + target_stat_values[++index] = atomic_read(&cm_resets_recvd); + target_stat_values[++index] = nesadapter->free_4kpbl; + target_stat_values[++index] = nesadapter->free_256pbl; + target_stat_values[++index] = int_mod_timer_init; + target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; + target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; + target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; + target_stat_values[++index] = atomic_read(&pau_qps_created); + target_stat_values[++index] = atomic_read(&pau_qps_destroyed); +} + +/** + * nes_netdev_get_drvinfo + */ +static void nes_netdev_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + + strcpy(drvinfo->driver, DRV_NAME); + strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev)); + sprintf(drvinfo->fw_version, "%u.%u", nesadapter->firmware_version>>16, + nesadapter->firmware_version & 0x000000ff); + strcpy(drvinfo->version, DRV_VERSION); + drvinfo->testinfo_len = 0; + drvinfo->eedump_len = 0; + drvinfo->regdump_len = 0; +} + + +/** + * nes_netdev_set_coalesce + */ +static int nes_netdev_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *et_coalesce) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + unsigned long flags; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + if (et_coalesce->rx_max_coalesced_frames_low) { + shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; + } + if (et_coalesce->rx_max_coalesced_frames_irq) { + shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq; + } + if (et_coalesce->rx_max_coalesced_frames_high) { + shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high; + } + if (et_coalesce->rx_coalesce_usecs_low) { + shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low; + } + if (et_coalesce->rx_coalesce_usecs_high) { + shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high; + } + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); + + /* using this to drive total interrupt moderation */ + nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq; + if (et_coalesce->use_adaptive_rx_coalesce) { + nesadapter->et_use_adaptive_rx_coalesce = 1; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->et_rx_coalesce_usecs_irq = 0; + if (et_coalesce->pkt_rate_low) { + nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; + } + } else { + nesadapter->et_use_adaptive_rx_coalesce = 0; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + if (nesadapter->et_rx_coalesce_usecs_irq) { + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, + 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8))); + } + } + return 0; +} + + +/** + * nes_netdev_get_coalesce + */ +static int nes_netdev_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *et_coalesce) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct ethtool_coalesce temp_et_coalesce; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + unsigned long flags; + + memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce)); + temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; + temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; + temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; + temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low; + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; + temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; + temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high; + temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; + temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max; + if (nesadapter->et_use_adaptive_rx_coalesce) { + temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use; + } + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); + memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); + return 0; +} + + +/** + * nes_netdev_get_pauseparam + */ +static void nes_netdev_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *et_pauseparam) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + et_pauseparam->autoneg = 0; + et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0; + et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0; +} + + +/** + * nes_netdev_set_pauseparam + */ +static int nes_netdev_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *et_pauseparam) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 u32temp; + + if (et_pauseparam->autoneg) { + /* TODO: should return unsupported */ + return 0; + } + if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); + u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); + nesdev->disable_tx_flow_control = 0; + } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); + u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); + nesdev->disable_tx_flow_control = 1; + } + if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); + u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp); + nesdev->disable_rx_flow_control = 0; + } else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) { + u32temp = nes_read_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); + u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; + nes_write_indexed(nesdev, + NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp); + nesdev->disable_rx_flow_control = 1; + } + + return 0; +} + + +/** + * nes_netdev_get_settings + */ +static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 mac_index = nesdev->mac_index; + u8 phy_type = nesadapter->phy_type[mac_index]; + u8 phy_index = nesadapter->phy_index[mac_index]; + u16 phy_data; + + et_cmd->duplex = DUPLEX_FULL; + et_cmd->port = PORT_MII; + et_cmd->maxtxpkt = 511; + et_cmd->maxrxpkt = 511; + + if (nesadapter->OneG_Mode) { + ethtool_cmd_speed_set(et_cmd, SPEED_1000); + if (phy_type == NES_PHY_TYPE_PUMA_1G) { + et_cmd->supported = SUPPORTED_1000baseT_Full; + et_cmd->advertising = ADVERTISED_1000baseT_Full; + et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->phy_address = mac_index; + } else { + unsigned long flags; + et_cmd->supported = SUPPORTED_1000baseT_Full + | SUPPORTED_Autoneg; + et_cmd->advertising = ADVERTISED_1000baseT_Full + | ADVERTISED_Autoneg; + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + if (phy_data & 0x1000) + et_cmd->autoneg = AUTONEG_ENABLE; + else + et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->phy_address = phy_index; + } + return 0; + } + if ((phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D) || + (phy_type == NES_PHY_TYPE_KR)) { + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->port = PORT_FIBRE; + et_cmd->supported = SUPPORTED_FIBRE; + et_cmd->advertising = ADVERTISED_FIBRE; + et_cmd->phy_address = phy_index; + } else { + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->supported = SUPPORTED_10000baseT_Full; + et_cmd->advertising = ADVERTISED_10000baseT_Full; + et_cmd->phy_address = mac_index; + } + ethtool_cmd_speed_set(et_cmd, SPEED_10000); + et_cmd->autoneg = AUTONEG_DISABLE; + return 0; +} + + +/** + * nes_netdev_set_settings + */ +static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) { + unsigned long flags; + u16 phy_data; + u8 phy_index = nesadapter->phy_index[nesdev->mac_index]; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + if (et_cmd->autoneg) { + /* Turn on Full duplex, Autoneg, and restart autonegotiation */ + phy_data |= 0x1300; + } else { + /* Turn off autoneg */ + phy_data &= ~0x1000; + } + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + } + + return 0; +} + + +static const struct ethtool_ops nes_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_settings = nes_netdev_get_settings, + .set_settings = nes_netdev_set_settings, + .get_strings = nes_netdev_get_strings, + .get_sset_count = nes_netdev_get_sset_count, + .get_ethtool_stats = nes_netdev_get_ethtool_stats, + .get_drvinfo = nes_netdev_get_drvinfo, + .get_coalesce = nes_netdev_get_coalesce, + .set_coalesce = nes_netdev_set_coalesce, + .get_pauseparam = nes_netdev_get_pauseparam, + .set_pauseparam = nes_netdev_set_pauseparam, +}; + +static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 u32temp; + unsigned long flags; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + + nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name); + + /* Enable/Disable VLAN Stripping */ + u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); + if (features & NETIF_F_HW_VLAN_RX) + u32temp &= 0xfdffffff; + else + u32temp |= 0x02000000; + + nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); +} + +static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features) +{ + /* + * Since there is no support for separate rx/tx vlan accel + * enable/disable make sure tx flag is always in same state as rx. + */ + if (features & NETIF_F_HW_VLAN_RX) + features |= NETIF_F_HW_VLAN_TX; + else + features &= ~NETIF_F_HW_VLAN_TX; + + return features; +} + +static int nes_set_features(struct net_device *netdev, netdev_features_t features) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 changed = netdev->features ^ features; + + if (changed & NETIF_F_HW_VLAN_RX) + nes_vlan_mode(netdev, nesdev, features); + + return 0; +} + +static const struct net_device_ops nes_netdev_ops = { + .ndo_open = nes_netdev_open, + .ndo_stop = nes_netdev_stop, + .ndo_start_xmit = nes_netdev_start_xmit, + .ndo_get_stats = nes_netdev_get_stats, + .ndo_tx_timeout = nes_netdev_tx_timeout, + .ndo_set_mac_address = nes_netdev_set_mac_address, + .ndo_set_rx_mode = nes_netdev_set_multicast_list, + .ndo_change_mtu = nes_netdev_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_fix_features = nes_fix_features, + .ndo_set_features = nes_set_features, +}; + +/** + * nes_netdev_init - initialize network device + */ +struct net_device *nes_netdev_init(struct nes_device *nesdev, + void __iomem *mmio_addr) +{ + u64 u64temp; + struct nes_vnic *nesvnic; + struct net_device *netdev; + struct nic_qp_map *curr_qp_map; + u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index]; + + netdev = alloc_etherdev(sizeof(struct nes_vnic)); + if (!netdev) { + printk(KERN_ERR PFX "nesvnic etherdev alloc failed"); + return NULL; + } + nesvnic = netdev_priv(netdev); + + nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name); + + SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev); + + netdev->watchdog_timeo = NES_TX_TIMEOUT; + netdev->irq = nesdev->pcidev->irq; + netdev->mtu = ETH_DATA_LEN; + netdev->hard_header_len = ETH_HLEN; + netdev->addr_len = ETH_ALEN; + netdev->type = ARPHRD_ETHER; + netdev->features = NETIF_F_HIGHDMA; + netdev->netdev_ops = &nes_netdev_ops; + netdev->ethtool_ops = &nes_ethtool_ops; + netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128); + nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); + netdev->features |= NETIF_F_HW_VLAN_TX; + + /* Fill in the port structure */ + nesvnic->netdev = netdev; + nesvnic->nesdev = nesdev; + nesvnic->msg_enable = netif_msg_init(debug, default_msg); + nesvnic->netdev_index = nesdev->netdev_count; + nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count; + nesvnic->max_frame_size = netdev->mtu + netdev->hard_header_len + VLAN_HLEN; + + curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)]; + nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid; + nesvnic->nic_index = curr_qp_map[nesdev->netdev_count].nic_index; + nesvnic->logical_port = curr_qp_map[nesdev->netdev_count].logical_port; + + /* Setup the burned in MAC address */ + u64temp = (u64)nesdev->nesadapter->mac_addr_low; + u64temp += ((u64)nesdev->nesadapter->mac_addr_high) << 32; + u64temp += nesvnic->nic_index; + netdev->dev_addr[0] = (u8)(u64temp>>40); + netdev->dev_addr[1] = (u8)(u64temp>>32); + netdev->dev_addr[2] = (u8)(u64temp>>24); + netdev->dev_addr[3] = (u8)(u64temp>>16); + netdev->dev_addr[4] = (u8)(u64temp>>8); + netdev->dev_addr[5] = (u8)u64temp; + memcpy(netdev->perm_addr, netdev->dev_addr, 6); + + netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | + NETIF_F_HW_VLAN_RX; + if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) + netdev->hw_features |= NETIF_F_TSO; + netdev->features |= netdev->hw_features; + netdev->hw_features |= NETIF_F_LRO; + + nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," + " nic_index = %d, logical_port = %d, mac_index = %d.\n", + nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id, + nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index); + + if (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) { + + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1; + if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) { + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; + } else { + nesvnic->qp_nic_index[2] = nesvnic->nic_index + 2; + nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3; + } + } else { + if (nesvnic->nesdev->nesadapter->port_count == 2 || + (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = nesvnic->nic_index + + 2; + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; + } else { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = 0xf; + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; + } + } + nesvnic->next_qp_nic_index = 0; + + if (nesdev->netdev_count == 0) { + nesvnic->rdma_enabled = 1; + } else { + nesvnic->rdma_enabled = 0; + } + nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id; + init_timer(&nesvnic->event_timer); + nesvnic->event_timer.function = NULL; + spin_lock_init(&nesvnic->tx_lock); + spin_lock_init(&nesvnic->port_ibevent_lock); + nesdev->netdev[nesdev->netdev_count] = netdev; + + nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n", + nesvnic, nesdev->mac_index); + list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); + + if ((nesdev->netdev_count == 0) && + ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || + ((phy_type == NES_PHY_TYPE_PUMA_1G) && + (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || + ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { + u32 u32temp; + u32 link_mask = 0; + u32 link_val = 0; + u16 temp_phy_data; + u16 phy_data = 0; + unsigned long flags; + + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1))); + if (phy_type != NES_PHY_TYPE_PUMA_1G) { + u32temp |= 0x00200000; + nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1)), u32temp); + } + + /* Check and set linkup here. This is for back to back */ + /* configuration where second port won't get link interrupt */ + switch (phy_type) { + case NES_PHY_TYPE_PUMA_1G: + if (nesdev->mac_index < 2) { + link_mask = 0x01010000; + link_val = 0x01010000; + } else { + link_mask = 0x02020000; + link_val = 0x02020000; + } + break; + case NES_PHY_TYPE_SFP_D: + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 1, 0x9003); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + break; + default: + link_mask = 0x0f1f0000; + link_val = 0x0f0f0000; + break; + } + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1))); + + if (phy_type == NES_PHY_TYPE_SFP_D) { + if (phy_data & 0x0004) + nesvnic->linkup = 1; + } else { + if ((u32temp & link_mask) == link_val) + nesvnic->linkup = 1; + } + + /* clear the MAC interrupt status, assumes direct logical to physical mapping */ + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); + nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); + + nes_init_phy(nesdev); + } + + nes_vlan_mode(netdev, nesdev, netdev->features); + + return netdev; +} + + +/** + * nes_netdev_destroy - destroy network device structure + */ +void nes_netdev_destroy(struct net_device *netdev) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + + /* make sure 'stop' method is called by Linux stack */ + /* nes_netdev_stop(netdev); */ + + list_del(&nesvnic->list); + + if (nesvnic->of_device_registered) { + nes_destroy_ofa_device(nesvnic->nesibdev); + } + + free_netdev(netdev); +} + + +/** + * nes_nic_cm_xmit -- CM calls this to send out pkts + */ +int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + int ret; + + skb->dev = netdev; + ret = dev_queue_xmit(skb); + if (ret) { + nes_debug(NES_DBG_CM, "Bad return code from dev_queue_xmit %d\n", ret); + } + + return ret; +} diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h new file mode 100644 index 00000000..4926de74 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef NES_USER_H +#define NES_USER_H + +#include + +#define NES_ABI_USERSPACE_VER 1 +#define NES_ABI_KERNEL_VER 1 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct nes_alloc_ucontext_req { + __u32 reserved32; + __u8 userspace_ver; + __u8 reserved8[3]; +}; + +struct nes_alloc_ucontext_resp { + __u32 max_pds; /* maximum pds allowed for this user process */ + __u32 max_qps; /* maximum qps allowed for this user process */ + __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */ + __u8 virtwq; /* flag to indicate if virtual WQ are to be used or not */ + __u8 kernel_ver; + __u8 reserved[2]; +}; + +struct nes_alloc_pd_resp { + __u32 pd_id; + __u32 mmap_db_index; +}; + +struct nes_create_cq_req { + __u64 user_cq_buffer; + __u32 mcrqf; + __u8 reserved[4]; +}; + +struct nes_create_qp_req { + __u64 user_wqe_buffers; +}; + +enum iwnes_memreg_type { + IWNES_MEMREG_TYPE_MEM = 0x0000, + IWNES_MEMREG_TYPE_QP = 0x0001, + IWNES_MEMREG_TYPE_CQ = 0x0002, + IWNES_MEMREG_TYPE_MW = 0x0003, + IWNES_MEMREG_TYPE_FMR = 0x0004, + IWNES_MEMREG_TYPE_FMEM = 0x0005, +}; + +struct nes_mem_reg_req { + __u32 reg_type; /* indicates if id is memory, QP or CQ */ + __u32 reserved; +}; + +struct nes_create_cq_resp { + __u32 cq_id; + __u32 cq_size; + __u32 mmap_db_index; + __u32 reserved; +}; + +struct nes_create_qp_resp { + __u32 qp_id; + __u32 actual_sq_size; + __u32 actual_rq_size; + __u32 mmap_sq_db_index; + __u32 mmap_rq_db_index; + __u32 nes_drv_opt; +}; + +#endif /* NES_USER_H */ diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c new file mode 100644 index 00000000..e98f4fc0 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -0,0 +1,972 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "nes.h" + +static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); + +u32 mh_detected; +u32 mh_pauses_sent; + +static u32 nes_set_pau(struct nes_device *nesdev) +{ + u32 ret = 0; + u32 counter; + + nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU); + nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); + + for (counter = 0; counter < NES_PAU_COUNTER; counter++) { + udelay(30); + if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) { + printk(KERN_INFO PFX "PAU is supported.\n"); + break; + } + nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); + } + if (counter == NES_PAU_COUNTER) { + printk(KERN_INFO PFX "PAU is not supported.\n"); + return -EPERM; + } + return ret; +} + +/** + * nes_read_eeprom_values - + */ +int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesadapter) +{ + u32 mac_addr_low; + u16 mac_addr_high; + u16 eeprom_data; + u16 eeprom_offset; + u16 next_section_address; + u16 sw_section_ver; + u8 major_ver = 0; + u8 minor_ver = 0; + + /* TODO: deal with EEPROM endian issues */ + if (nesadapter->firmware_eeprom_offset == 0) { + /* Read the EEPROM Parameters */ + eeprom_data = nes_read16_eeprom(nesdev->regs, 0); + nes_debug(NES_DBG_HW, "EEPROM Offset 0 = 0x%04X\n", eeprom_data); + eeprom_offset = 2 + (((eeprom_data & 0x007f) << 3) << + ((eeprom_data & 0x0080) >> 7)); + nes_debug(NES_DBG_HW, "Firmware Offset = 0x%04X\n", eeprom_offset); + nesadapter->firmware_eeprom_offset = eeprom_offset; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4); + if (eeprom_data != 0x5746) { + nes_debug(NES_DBG_HW, "Not a valid Firmware Image = 0x%04X\n", eeprom_data); + return -1; + } + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + eeprom_offset += ((eeprom_data & 0x00ff) << 3) << ((eeprom_data & 0x0100) >> 8); + nes_debug(NES_DBG_HW, "Software Offset = 0x%04X\n", eeprom_offset); + nesadapter->software_eeprom_offset = eeprom_offset; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4); + if (eeprom_data != 0x5753) { + printk("Not a valid Software Image = 0x%04X\n", eeprom_data); + return -1; + } + sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset + 6); + nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n", + sw_section_ver); + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) << + ((eeprom_data & 0x0100) >> 8)); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x414d) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) << + ((eeprom_data & 0x0100) >> 8)); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x4f52) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x4f52 but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x5746) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5746 but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x5753) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5753 but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x414d) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_offset = next_section_address; + + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); + nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", + eeprom_offset + 2, eeprom_data); + next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); + if (eeprom_data != 0x464e) { + nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x464e but was 0x%04X\n", + eeprom_data); + goto no_fw_rev; + } + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8); + printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); + major_ver = (u8)(eeprom_data >> 8); + minor_ver = (u8)(eeprom_data); + + if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) { + nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n"); + } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) { + nesadapter->virtwq = 1; + } + if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3)) + nesadapter->send_term_ok = 1; + + if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) { + if (!nes_set_pau(nesdev)) + nesadapter->allow_unaligned_fpdus = 1; + } + + nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + + (u32)((u8)eeprom_data); + + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 10); + printk(PFX "EEPROM version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); + nesadapter->eeprom_version = (((u32)(u8)(eeprom_data>>8)) << 16) + + (u32)((u8)eeprom_data); + +no_fw_rev: + /* eeprom is valid */ + eeprom_offset = nesadapter->software_eeprom_offset; + eeprom_offset += 8; + nesadapter->netdev_max = (u8)nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + mac_addr_high = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + mac_addr_low = (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + mac_addr_low <<= 16; + mac_addr_low += (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "Base MAC Address = 0x%04X%08X\n", + mac_addr_high, mac_addr_low); + nes_debug(NES_DBG_HW, "MAC Address count = %u\n", nesadapter->netdev_max); + + nesadapter->mac_addr_low = mac_addr_low; + nesadapter->mac_addr_high = mac_addr_high; + + /* Read the Phy Type array */ + eeprom_offset += 10; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_type[0] = (u8)(eeprom_data >> 8); + nesadapter->phy_type[1] = (u8)eeprom_data; + + /* Read the port array */ + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_type[2] = (u8)(eeprom_data >> 8); + nesadapter->phy_type[3] = (u8)eeprom_data; + /* port_count is set by soft reset reg */ + nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u," + " port 2 -> %u, port 3 -> %u\n", + nesadapter->port_count, + nesadapter->phy_type[0], nesadapter->phy_type[1], + nesadapter->phy_type[2], nesadapter->phy_type[3]); + + /* Read PD config array */ + eeprom_offset += 10; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[0] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[0] = eeprom_data; + nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[1] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[1] = eeprom_data; + nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[2] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[2] = eeprom_data; + nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[3] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[3] = eeprom_data; + nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]); + + /* Read Rx Pool Size */ + eeprom_offset += 22; /* 46 */ + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "rx_pool_size = 0x%08X\n", nesadapter->rx_pool_size); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->tx_pool_size = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "tx_pool_size = 0x%08X\n", nesadapter->tx_pool_size); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->rx_threshold = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "rx_threshold = 0x%08X\n", nesadapter->rx_threshold); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->tcp_timer_core_clk_divisor = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "tcp_timer_core_clk_divisor = 0x%08X\n", + nesadapter->tcp_timer_core_clk_divisor); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->iwarp_config = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "iwarp_config = 0x%08X\n", nesadapter->iwarp_config); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->cm_config = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "cm_config = 0x%08X\n", nesadapter->cm_config); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->sws_timer_config = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "sws_timer_config = 0x%08X\n", nesadapter->sws_timer_config); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->tcp_config1 = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "tcp_config1 = 0x%08X\n", nesadapter->tcp_config1); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->wqm_wat = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "wqm_wat = 0x%08X\n", nesadapter->wqm_wat); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + eeprom_offset += 2; + nesadapter->core_clock = (((u32)eeprom_data) << 16) + + nes_read16_eeprom(nesdev->regs, eeprom_offset); + nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock); + + if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) { + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8; + nesadapter->phy_index[1] = eeprom_data & 0x00ff; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8; + nesadapter->phy_index[3] = eeprom_data & 0x00ff; + } else { + nesadapter->phy_index[0] = 4; + nesadapter->phy_index[1] = 5; + nesadapter->phy_index[2] = 6; + nesadapter->phy_index[3] = 7; + } + nes_debug(NES_DBG_HW, "Phy address map = 0 > %u, 1 > %u, 2 > %u, 3 > %u\n", + nesadapter->phy_index[0],nesadapter->phy_index[1], + nesadapter->phy_index[2],nesadapter->phy_index[3]); + } + + return 0; +} + + +/** + * nes_read16_eeprom + */ +static u16 nes_read16_eeprom(void __iomem *addr, u16 offset) +{ + writel(NES_EEPROM_READ_REQUEST + (offset >> 1), + (void __iomem *)addr + NES_EEPROM_COMMAND); + + do { + } while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) & + NES_EEPROM_READ_REQUEST); + + return readw((void __iomem *)addr + NES_EEPROM_DATA); +} + + +/** + * nes_write_1G_phy_reg + */ +void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data) +{ + u32 u32temp; + u32 counter; + + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */ + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); +} + + +/** + * nes_read_1G_phy_reg + * This routine only issues the read, the data must be read + * separately. + */ +void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data) +{ + u32 u32temp; + u32 counter; + + /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n", + phy_addr, nesdev->mac_index); */ + + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */ + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) { + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); + *data = 0xffff; + } else { + *data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } +} + + +/** + * nes_write_10G_phy_reg + */ +void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg, + u16 data) +{ + u32 port_addr; + u32 u32temp; + u32 counter; + + port_addr = phy_addr; + + /* set address */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); + + /* set data */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); +} + + +/** + * nes_read_10G_phy_reg + * This routine only issues the read, the data must be read + * separately. + */ +void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg) +{ + u32 port_addr; + u32 u32temp; + u32 counter; + + port_addr = phy_addr; + + /* set address */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); + + /* issue read */ + nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, + 0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); + for (counter = 0; counter < 100 ; counter++) { + udelay(30); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); + if (u32temp & 1) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); + break; + } + } + if (!(u32temp & 1)) + nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", + u32temp); +} + + +/** + * nes_get_cqp_request + */ +struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_cqp_request *cqp_request = NULL; + + if (!list_empty(&nesdev->cqp_avail_reqs)) { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + if (!list_empty(&nesdev->cqp_avail_reqs)) { + cqp_request = list_entry(nesdev->cqp_avail_reqs.next, + struct nes_cqp_request, list); + list_del_init(&cqp_request->list); + } + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } + if (cqp_request == NULL) { + cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC); + if (cqp_request) { + cqp_request->dynamic = 1; + INIT_LIST_HEAD(&cqp_request->list); + } + } + + if (cqp_request) { + init_waitqueue_head(&cqp_request->waitq); + cqp_request->waiting = 0; + cqp_request->request_done = 0; + cqp_request->callback = 0; + init_waitqueue_head(&cqp_request->waitq); + nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n", + cqp_request); + } else + printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n", + __func__); + + return cqp_request; +} + +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + unsigned long flags; + + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); + + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } +} + +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + if (atomic_dec_and_test(&cqp_request->refcount)) + nes_free_cqp_request(nesdev, cqp_request); +} + + +/** + * nes_post_cqp_request + */ +void nes_post_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + unsigned long flags; + u32 cqp_head; + u64 u64temp; + u32 opcode; + int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) & + (nesdev->cqp.sq_size - 1)) != 1) + && (list_empty(&nesdev->cqp_pending_reqs))) { + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); + opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]); + if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) + ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; + barrier(); + u64temp = (unsigned long)cqp_request; + set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp); + nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," + " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," + " waiting = %d, refcount = %d.\n", + opcode & NES_CQP_OPCODE_MASK, + le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, + nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, + cqp_request->waiting, atomic_read(&cqp_request->refcount)); + + barrier(); + + /* Ring doorbell (1 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); + + barrier(); + } else { + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X" + " put on the pending queue.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX])); + list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs); + } + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + return; +} + +/** + * nes_arp_table + */ +int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + int arp_index; + int err = 0; + __be32 tmp_addr; + + for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) { + if (nesadapter->arp_table[arp_index].ip_addr == ip_addr) + break; + } + + if (action == NES_ARP_ADD) { + if (arp_index != nesadapter->arp_table_size) { + return -1; + } + + arp_index = 0; + err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps, + nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index); + if (err) { + nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err); + return err; + } + nes_debug(NES_DBG_NETDEV, "ADD, arp_index=%d\n", arp_index); + + nesadapter->arp_table[arp_index].ip_addr = ip_addr; + memcpy(nesadapter->arp_table[arp_index].mac_addr, mac_addr, ETH_ALEN); + return arp_index; + } + + /* DELETE or RESOLVE */ + if (arp_index == nesadapter->arp_table_size) { + tmp_addr = cpu_to_be32(ip_addr); + nes_debug(NES_DBG_NETDEV, "MAC for %pI4 not in ARP table - cannot %s\n", + &tmp_addr, action == NES_ARP_RESOLVE ? "resolve" : "delete"); + return -1; + } + + if (action == NES_ARP_RESOLVE) { + nes_debug(NES_DBG_NETDEV, "RESOLVE, arp_index=%d\n", arp_index); + return arp_index; + } + + if (action == NES_ARP_DELETE) { + nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index); + nesadapter->arp_table[arp_index].ip_addr = 0; + memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN); + nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index); + return arp_index; + } + + return -1; +} + + +/** + * nes_mh_fix + */ +void nes_mh_fix(unsigned long parm) +{ + unsigned long flags; + struct nes_device *nesdev = (struct nes_device *)parm; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_vnic *nesvnic; + u32 used_chunks_tx; + u32 temp_used_chunks_tx; + u32 temp_last_used_chunks_tx; + u32 used_chunks_mask; + u32 mac_tx_frames_low; + u32 mac_tx_frames_high; + u32 mac_tx_pauses; + u32 serdes_status; + u32 reset_value; + u32 tx_control; + u32 tx_config; + u32 tx_pause_quanta; + u32 rx_control; + u32 rx_config; + u32 mac_exact_match; + u32 mpp_debug; + u32 i=0; + u32 chunks_tx_progress = 0; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + if ((nesadapter->mac_sw_state[0] != NES_MAC_SW_IDLE) || (nesadapter->mac_link_down[0])) { + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + goto no_mh_work; + } + nesadapter->mac_sw_state[0] = NES_MAC_SW_MH; + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + do { + mac_tx_frames_low = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_LOW); + mac_tx_frames_high = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_HIGH); + mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); + used_chunks_tx = nes_read_indexed(nesdev, NES_IDX_USED_CHUNKS_TX); + nesdev->mac_pause_frames_sent += mac_tx_pauses; + used_chunks_mask = 0; + temp_used_chunks_tx = used_chunks_tx; + temp_last_used_chunks_tx = nesdev->last_used_chunks_tx; + + if (nesdev->netdev[0]) { + nesvnic = netdev_priv(nesdev->netdev[0]); + } else { + break; + } + + for (i=0; i<4; i++) { + used_chunks_mask <<= 8; + if (nesvnic->qp_nic_index[i] != 0xff) { + used_chunks_mask |= 0xff; + if ((temp_used_chunks_tx&0xff)<(temp_last_used_chunks_tx&0xff)) { + chunks_tx_progress = 1; + } + } + temp_used_chunks_tx >>= 8; + temp_last_used_chunks_tx >>= 8; + } + if ((mac_tx_frames_low) || (mac_tx_frames_high) || + (!(used_chunks_tx&used_chunks_mask)) || + (!(nesdev->last_used_chunks_tx&used_chunks_mask)) || + (chunks_tx_progress) ) { + nesdev->last_used_chunks_tx = used_chunks_tx; + break; + } + nesdev->last_used_chunks_tx = used_chunks_tx; + barrier(); + + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000005); + mh_pauses_sent++; + mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); + if (mac_tx_pauses) { + nesdev->mac_pause_frames_sent += mac_tx_pauses; + break; + } + + tx_control = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONTROL); + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_pause_quanta = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA); + rx_control = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONTROL); + rx_config = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONFIG); + mac_exact_match = nes_read_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM); + mpp_debug = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG); + + /* one last ditch effort to avoid a false positive */ + mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); + if (mac_tx_pauses) { + nesdev->last_mac_tx_pauses = nesdev->mac_pause_frames_sent; + nes_debug(NES_DBG_HW, "failsafe caught slow outbound pause\n"); + break; + } + mh_detected++; + + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, 0x00000000); + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value | 0x0000001d); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (i++ < 5000)) { + /* mdelay(1); */ + } + + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); + serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0); + + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000); + if (nesadapter->OneG_Mode) { + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222); + } else { + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222); + } + serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff); + + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control); + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + nes_write_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA, tx_pause_quanta); + nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONTROL, rx_control); + nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONFIG, rx_config); + nes_write_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM, mac_exact_match); + nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG, mpp_debug); + + } while (0); + + nesadapter->mac_sw_state[0] = NES_MAC_SW_IDLE; +no_mh_work: + nesdev->nesadapter->mh_timer.expires = jiffies + (HZ/5); + add_timer(&nesdev->nesadapter->mh_timer); +} + +/** + * nes_clc + */ +void nes_clc(unsigned long parm) +{ + unsigned long flags; + struct nes_device *nesdev = (struct nes_device *)parm; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nesadapter->link_interrupt_count[0] = 0; + nesadapter->link_interrupt_count[1] = 0; + nesadapter->link_interrupt_count[2] = 0; + nesadapter->link_interrupt_count[3] = 0; + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + + nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ + add_timer(&nesadapter->lc_timer); +} + + +/** + * nes_dump_mem + */ +void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length) +{ + char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f'}; + char *ptr; + char hex_buf[80]; + char ascii_buf[20]; + int num_char; + int num_ascii; + int num_hex; + + if (!(nes_debug_level & dump_debug_level)) { + return; + } + + ptr = addr; + if (length > 0x100) { + nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100); + length = 0x100; + } + nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length); + + memset(ascii_buf, 0, 20); + memset(hex_buf, 0, 80); + + num_ascii = 0; + num_hex = 0; + for (num_char = 0; num_char < length; num_char++) { + if (num_ascii == 8) { + ascii_buf[num_ascii++] = ' '; + hex_buf[num_hex++] = '-'; + hex_buf[num_hex++] = ' '; + } + + if (*ptr < 0x20 || *ptr > 0x7e) + ascii_buf[num_ascii++] = '.'; + else + ascii_buf[num_ascii++] = *ptr; + hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)]; + hex_buf[num_hex++] = xlate[*ptr & 0x0f]; + hex_buf[num_hex++] = ' '; + ptr++; + + if (num_ascii >= 17) { + /* output line and reset */ + nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); + memset(ascii_buf, 0, 20); + memset(hex_buf, 0, 80); + num_ascii = 0; + num_hex = 0; + } + } + + /* output the rest */ + if (num_ascii) { + while (num_ascii < 17) { + if (num_ascii == 8) { + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + } + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + num_ascii++; + } + + nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); + } +} diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c new file mode 100644 index 00000000..8b8812de --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -0,0 +1,4051 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "nes.h" + +#include + +atomic_t mod_qp_timouts; +atomic_t qps_created; +atomic_t sw_qps_destroyed; + +static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); + +/** + * nes_alloc_mw + */ +static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_cqp_request *cqp_request; + struct nes_mr *nesmr; + struct ib_mw *ibmw; + struct nes_hw_cqp_wqe *cqp_wqe; + int ret; + u32 stag; + u32 stag_index = 0; + u32 next_stag_index = 0; + u32 driver_key = 0; + u8 stag_key = 0; + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + + driver_key = 0; + + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + + ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, + nesadapter->max_mr, &stag_index, &next_stag_index); + if (ret) { + return ERR_PTR(ret); + } + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + + nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n", + stag, stag_index); + + /* Register the region with the adapter */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + kfree(nesmr); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = + cpu_to_le32( NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_RIGHTS_REMOTE_READ | + NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_VA_TO | + NES_CQP_STAG_REM_ACC_EN); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + stag, ret, cqp_request->major_code, cqp_request->minor_code); + if ((!ret) || (cqp_request->major_code)) { + nes_put_cqp_request(nesdev, cqp_request); + kfree(nesmr); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + if (!ret) { + return ERR_PTR(-ETIME); + } else { + return ERR_PTR(-ENOMEM); + } + } + nes_put_cqp_request(nesdev, cqp_request); + + nesmr->ibmw.rkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_MW; + ibmw = &nesmr->ibmw; + nesmr->pbl_4k = 0; + nesmr->pbls_used = 0; + + return ibmw; +} + + +/** + * nes_dealloc_mw + */ +static int nes_dealloc_mw(struct ib_mw *ibmw) +{ + struct nes_mr *nesmr = to_nesmw(ibmw); + struct nes_vnic *nesvnic = to_nesvnic(ibmw->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + int err = 0; + int ret; + + /* Deallocate the window with the adapter */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", + ibmw->rkey); + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + ret, cqp_request->major_code, cqp_request->minor_code); + if (!ret) + err = -ETIME; + else if (cqp_request->major_code) + err = -EIO; + + nes_put_cqp_request(nesdev, cqp_request); + + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + (ibmw->rkey & 0x0fffff00) >> 8); + kfree(nesmr); + + return err; +} + + +/** + * nes_bind_mw + */ +static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, + struct ib_mw_bind *ibmw_bind) +{ + u64 u64temp; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + /* struct nes_mr *nesmr = to_nesmw(ibmw); */ + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_hw_qp_wqe *wqe; + unsigned long flags = 0; + u32 head; + u32 wqe_misc = 0; + u32 qsize; + + if (nesqp->ibqp_state > IB_QPS_RTS) + return -EINVAL; + + spin_lock_irqsave(&nesqp->lock, flags); + + head = nesqp->hwqp.sq_head; + qsize = nesqp->hwqp.sq_tail; + + /* Check for SQ overflow */ + if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { + spin_unlock_irqrestore(&nesqp->lock, flags); + return -ENOMEM; + } + + wqe = &nesqp->hwqp.sq_vbase[head]; + /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */ + nes_fill_init_qp_wqe(wqe, nesqp, head); + u64temp = ibmw_bind->wr_id; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp); + wqe_misc = NES_IWARP_SQ_OP_BIND; + + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + + if (ibmw_bind->send_flags & IB_SEND_SIGNALED) + wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; + + if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) { + wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE; + } + if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) { + wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ; + } + + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX, + ibmw_bind->length); + wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0; + u64temp = (u64)ibmw_bind->addr; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp); + + head++; + if (head >= qsize) + head = 0; + + nesqp->hwqp.sq_head = head; + barrier(); + + nes_write32(nesdev->regs+NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); + + spin_unlock_irqrestore(&nesqp->lock, flags); + + return 0; +} + + +/* + * nes_alloc_fast_mr + */ +static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, + u32 stag, u32 page_count) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + int ret; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 opcode = 0; + u16 major_code; + u64 region_length = page_count * PAGE_SIZE; + + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, " + "region_length = %llu\n", + page_count, region_length); + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nesadapter->free_4kpbl > 0) { + nesadapter->free_4kpbl--; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } else { + /* No 4kpbl's available: */ + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_debug(NES_DBG_MR, "Out of Pbls\n"); + nes_free_cqp_request(nesdev, cqp_request); + return -ENOMEM; + } + + opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR | + NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO | + NES_CQP_STAG_REM_ACC_EN; + /* + * The current OFED API does not support the zero based TO option. + * If added then need to changed the NES_CQP_STAG_VA* option. Also, + * the API does not support that ability to have the MR set for local + * access only when created and not allow the SQ op to override. Given + * this the remote enable must be set here. + */ + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1); + + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = + cpu_to_le32((u32)(region_length >> 8) & 0xff000000); + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |= + cpu_to_le32(nespd->pd_id & 0x00007fff); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8)); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); + barrier(); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, + (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + + nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, " + "wait_event_timeout ret = %u, CQP Major:Minor codes = " + "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, + cqp_request->minor_code); + major_code = cqp_request->major_code; + nes_put_cqp_request(nesdev, cqp_request); + + if (!ret || major_code) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_4kpbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + return 0; +} + +/* + * nes_alloc_fast_reg_mr + */ +static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len) +{ + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + u32 next_stag_index; + u8 stag_key = 0; + u32 driver_key = 0; + int err = 0; + u32 stag_index = 0; + struct nes_mr *nesmr; + u32 stag; + int ret; + struct ib_mr *ibmr; +/* + * Note: Set to always use a fixed length single page entry PBL. This is to allow + * for the fast_reg_mr operation to always know the size of the PBL. + */ + if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) + return ERR_PTR(-E2BIG); + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, + nesadapter->max_mr, &stag_index, + &next_stag_index); + if (err) + return ERR_PTR(err); + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + + nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n", + stag, stag_index); + + ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len); + + if (ret == 0) { + nesmr->ibmr.rkey = stag; + nesmr->ibmr.lkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_FMEM; + ibmr = &nesmr->ibmr; + } else { + kfree(nesmr); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + } + return ibmr; +} + +/* + * nes_alloc_fast_reg_page_list + */ +static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list( + struct ib_device *ibdev, + int page_list_len) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct ib_fast_reg_page_list *pifrpl; + struct nes_ib_fast_reg_page_list *pnesfrpl; + + if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) + return ERR_PTR(-E2BIG); + /* + * Allocate the ib_fast_reg_page_list structure, the + * nes_fast_bpl structure, and the PLB table. + */ + pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) + + page_list_len * sizeof(u64), GFP_KERNEL); + + if (!pnesfrpl) + return ERR_PTR(-ENOMEM); + + pifrpl = &pnesfrpl->ibfrpl; + pifrpl->page_list = &pnesfrpl->pbl; + pifrpl->max_page_list_len = page_list_len; + /* + * Allocate the WQE PBL + */ + pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev, + page_list_len * sizeof(u64), + &pnesfrpl->nes_wqe_pbl.paddr); + + if (!pnesfrpl->nes_wqe_pbl.kva) { + kfree(pnesfrpl); + return ERR_PTR(-ENOMEM); + } + nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, " + "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, " + "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl, + pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva, + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr); + + return pifrpl; +} + +/* + * nes_free_fast_reg_page_list + */ +static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl) +{ + struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_ib_fast_reg_page_list *pnesfrpl; + + pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl); + /* + * Free the WQE PBL. + */ + pci_free_consistent(nesdev->pcidev, + pifrpl->max_page_list_len * sizeof(u64), + pnesfrpl->nes_wqe_pbl.kva, + pnesfrpl->nes_wqe_pbl.paddr); + /* + * Free the PBL structure + */ + kfree(pnesfrpl); +} + +/** + * nes_query_device + */ +static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + + memset(props, 0, sizeof(*props)); + memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); + + props->fw_ver = nesdev->nesadapter->firmware_version; + props->device_cap_flags = nesdev->nesadapter->device_cap_flags; + props->vendor_id = nesdev->nesadapter->vendor_id; + props->vendor_part_id = nesdev->nesadapter->vendor_part_id; + props->hw_ver = nesdev->nesadapter->hw_rev; + props->max_mr_size = 0x80000000; + props->max_qp = nesibdev->max_qp; + props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; + props->max_sge = nesdev->nesadapter->max_sge; + props->max_cq = nesibdev->max_cq; + props->max_cqe = nesdev->nesadapter->max_cqe; + props->max_mr = nesibdev->max_mr; + props->max_mw = nesibdev->max_mr; + props->max_pd = nesibdev->max_pd; + props->max_sge_rd = 1; + switch (nesdev->nesadapter->max_irrq_wr) { + case 0: + props->max_qp_rd_atom = 2; + break; + case 1: + props->max_qp_rd_atom = 8; + break; + case 2: + props->max_qp_rd_atom = 32; + break; + case 3: + props->max_qp_rd_atom = 64; + break; + default: + props->max_qp_rd_atom = 0; + } + props->max_qp_init_rd_atom = props->max_qp_rd_atom; + props->atomic_cap = IB_ATOMIC_NONE; + props->max_map_per_fmr = 1; + + return 0; +} + + +/** + * nes_query_port + */ +static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct net_device *netdev = nesvnic->netdev; + + memset(props, 0, sizeof(*props)); + + props->max_mtu = IB_MTU_4096; + + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + props->lid = 1; + props->lmc = 0; + props->sm_lid = 0; + props->sm_sl = 0; + if (netif_queue_stopped(netdev)) + props->state = IB_PORT_DOWN; + else if (nesvnic->linkup) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_DOWN; + props->phys_state = 0; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->qkey_viol_cntr = 0; + props->active_width = IB_WIDTH_4X; + props->active_speed = IB_SPEED_SDR; + props->max_msg_sz = 0x80000000; + + return 0; +} + + +/** + * nes_query_pkey + */ +static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + *pkey = 0; + return 0; +} + + +/** + * nes_query_gid + */ +static int nes_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), nesvnic->netdev->dev_addr, 6); + + return 0; +} + + +/** + * nes_alloc_ucontext - Allocate the user context data structure. This keeps track + * of all objects associated with a particular user-mode client. + */ +static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_alloc_ucontext_req req; + struct nes_alloc_ucontext_resp uresp; + struct nes_ucontext *nes_ucontext; + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + + + if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) { + printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n"); + return ERR_PTR(-EINVAL); + } + + if (req.userspace_ver != NES_ABI_USERSPACE_VER) { + printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n", + req.userspace_ver, NES_ABI_USERSPACE_VER); + return ERR_PTR(-EINVAL); + } + + + memset(&uresp, 0, sizeof uresp); + + uresp.max_qps = nesibdev->max_qp; + uresp.max_pds = nesibdev->max_pd; + uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2; + uresp.virtwq = nesadapter->virtwq; + uresp.kernel_ver = NES_ABI_KERNEL_VER; + + nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL); + if (!nes_ucontext) + return ERR_PTR(-ENOMEM); + + nes_ucontext->nesdev = nesdev; + nes_ucontext->mmap_wq_offset = uresp.max_pds; + nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset + + ((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) / + PAGE_SIZE; + + + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + kfree(nes_ucontext); + return ERR_PTR(-EFAULT); + } + + INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list); + INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list); + atomic_set(&nes_ucontext->usecnt, 1); + return &nes_ucontext->ibucontext; +} + + +/** + * nes_dealloc_ucontext + */ +static int nes_dealloc_ucontext(struct ib_ucontext *context) +{ + /* struct nes_vnic *nesvnic = to_nesvnic(context->device); */ + /* struct nes_device *nesdev = nesvnic->nesdev; */ + struct nes_ucontext *nes_ucontext = to_nesucontext(context); + + if (!atomic_dec_and_test(&nes_ucontext->usecnt)) + return 0; + kfree(nes_ucontext); + return 0; +} + + +/** + * nes_mmap + */ +static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + unsigned long index; + struct nes_vnic *nesvnic = to_nesvnic(context->device); + struct nes_device *nesdev = nesvnic->nesdev; + /* struct nes_adapter *nesadapter = nesdev->nesadapter; */ + struct nes_ucontext *nes_ucontext; + struct nes_qp *nesqp; + + nes_ucontext = to_nesucontext(context); + + + if (vma->vm_pgoff >= nes_ucontext->mmap_wq_offset) { + index = (vma->vm_pgoff - nes_ucontext->mmap_wq_offset) * PAGE_SIZE; + index /= ((sizeof(struct nes_hw_qp_wqe) * nesdev->nesadapter->max_qp_wr * 2) + + PAGE_SIZE-1) & (~(PAGE_SIZE-1)); + if (!test_bit(index, nes_ucontext->allocated_wqs)) { + nes_debug(NES_DBG_MMAP, "wq %lu not allocated\n", index); + return -EFAULT; + } + nesqp = nes_ucontext->mmap_nesqp[index]; + if (nesqp == NULL) { + nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index); + return -EFAULT; + } + if (remap_pfn_range(vma, vma->vm_start, + virt_to_phys(nesqp->hwqp.sq_vbase) >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) { + nes_debug(NES_DBG_MMAP, "remap_pfn_range failed.\n"); + return -EAGAIN; + } + vma->vm_private_data = nesqp; + return 0; + } else { + index = vma->vm_pgoff; + if (!test_bit(index, nes_ucontext->allocated_doorbells)) + return -EFAULT; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, + (nesdev->doorbell_start + + ((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096)) + >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + vma->vm_private_data = nes_ucontext; + return 0; + } + + return -ENOSYS; +} + + +/** + * nes_alloc_pd + */ +static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct nes_pd *nespd; + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_ucontext *nesucontext; + struct nes_alloc_pd_resp uresp; + u32 pd_num = 0; + int err; + + nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", + nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context, + netdev_refcnt_read(nesvnic->netdev)); + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, + nesadapter->max_pd, &pd_num, &nesadapter->next_pd); + if (err) { + return ERR_PTR(err); + } + + nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL); + if (!nespd) { + nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); + return ERR_PTR(-ENOMEM); + } + + nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", + nespd, nesvnic->nesibdev->ibdev.name); + + nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; + + if (context) { + nesucontext = to_nesucontext(context); + nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells, + NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); + nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", + nespd->mmap_db_index, nespd->pd_id); + if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) { + nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); + nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); + kfree(nespd); + return ERR_PTR(-ENOMEM); + } + + uresp.pd_id = nespd->pd_id; + uresp.mmap_db_index = nespd->mmap_db_index; + if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) { + nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); + kfree(nespd); + return ERR_PTR(-EFAULT); + } + + set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); + nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id; + nesucontext->first_free_db = nespd->mmap_db_index + 1; + } + + nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd); + return &nespd->ibpd; +} + + +/** + * nes_dealloc_pd + */ +static int nes_dealloc_pd(struct ib_pd *ibpd) +{ + struct nes_ucontext *nesucontext; + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + if ((ibpd->uobject) && (ibpd->uobject->context)) { + nesucontext = to_nesucontext(ibpd->uobject->context); + nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n", + nespd->mmap_db_index); + clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); + nesucontext->mmap_db_index[nespd->mmap_db_index] = 0; + if (nesucontext->first_free_db > nespd->mmap_db_index) { + nesucontext->first_free_db = nespd->mmap_db_index; + } + } + + nes_debug(NES_DBG_PD, "Deallocating PD%u structure located @%p.\n", + nespd->pd_id, nespd); + nes_free_resource(nesadapter, nesadapter->allocated_pds, + (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12)); + kfree(nespd); + + return 0; +} + + +/** + * nes_create_ah + */ +static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + return ERR_PTR(-ENOSYS); +} + + +/** + * nes_destroy_ah + */ +static int nes_destroy_ah(struct ib_ah *ah) +{ + return -ENOSYS; +} + + +/** + * nes_get_encoded_size + */ +static inline u8 nes_get_encoded_size(int *size) +{ + u8 encoded_size = 0; + if (*size <= 32) { + *size = 32; + encoded_size = 1; + } else if (*size <= 128) { + *size = 128; + encoded_size = 2; + } else if (*size <= 512) { + *size = 512; + encoded_size = 3; + } + return (encoded_size); +} + + + +/** + * nes_setup_virt_qp + */ +static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl, + struct nes_vnic *nesvnic, int sq_size, int rq_size) +{ + unsigned long flags; + void *mem; + __le64 *pbl = NULL; + __le64 *tpbl; + __le64 *pblbuffer; + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 pbl_entries; + u8 rq_pbl_entries; + u8 sq_pbl_entries; + + pbl_entries = nespbl->pbl_size >> 3; + nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%lx\n", + nespbl->pbl_size, pbl_entries, + (void *)nespbl->pbl_vbase, + (unsigned long) nespbl->pbl_pbase); + pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */ + /* now lets set the sq_vbase as well as rq_vbase addrs we will assign */ + /* the first pbl to be fro the rq_vbase... */ + rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> 12; + sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> 12; + nesqp->hwqp.sq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32); + if (!nespbl->page) { + nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n"); + kfree(nespbl); + return -ENOMEM; + } + + nesqp->hwqp.sq_vbase = kmap(nespbl->page); + nesqp->page = nespbl->page; + if (!nesqp->hwqp.sq_vbase) { + nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n"); + kfree(nespbl); + return -ENOMEM; + } + + /* Now to get to sq.. we need to calculate how many */ + /* PBL entries were used by the rq.. */ + pbl += sq_pbl_entries; + nesqp->hwqp.rq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32); + /* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */ + /*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */ + + nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%lx rq_vbase=%p rq_pbase=%lx\n", + nesqp->hwqp.sq_vbase, (unsigned long) nesqp->hwqp.sq_pbase, + nesqp->hwqp.rq_vbase, (unsigned long) nesqp->hwqp.rq_pbase); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (!nesadapter->free_256pbl) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + kfree(nespbl); + return -ENOMEM; + } + nesadapter->free_256pbl--; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase); + pblbuffer = nesqp->pbl_vbase; + if (!nesqp->pbl_vbase) { + /* memory allocated during nes_reg_user_mr() */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + return -ENOMEM; + } + memset(nesqp->pbl_vbase, 0, 256); + /* fill in the page address in the pbl buffer.. */ + tpbl = pblbuffer + 16; + pbl = (__le64 *)nespbl->pbl_vbase; + while (sq_pbl_entries--) + *tpbl++ = *pbl++; + tpbl = pblbuffer; + while (rq_pbl_entries--) + *tpbl++ = *pbl++; + + /* done with memory allocated during nes_reg_user_mr() */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + + nesqp->qp_mem_size = + max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256; /* this is Q2 */ + /* Round up to a multiple of a page */ + nesqp->qp_mem_size += PAGE_SIZE - 1; + nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); + + mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, + &nesqp->hwqp.q2_pbase); + + if (!mem) { + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); + nesqp->pbl_vbase = NULL; + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + return -ENOMEM; + } + nesqp->sq_kmapped = 1; + nesqp->hwqp.q2_vbase = mem; + mem += 256; + memset(nesqp->hwqp.q2_vbase, 0, 256); + nesqp->nesqp_context = mem; + memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); + nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; + + return 0; +} + + +/** + * nes_setup_mmap_qp + */ +static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic, + int sq_size, int rq_size) +{ + void *mem; + struct nes_device *nesdev = nesvnic->nesdev; + + nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) + + (sizeof(struct nes_hw_qp_wqe) * rq_size) + + max((u32)sizeof(struct nes_qp_context), ((u32)256)) + + 256; /* this is Q2 */ + /* Round up to a multiple of a page */ + nesqp->qp_mem_size += PAGE_SIZE - 1; + nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); + + mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, + &nesqp->hwqp.sq_pbase); + if (!mem) + return -ENOMEM; + nes_debug(NES_DBG_QP, "PCI consistent memory for " + "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n", + mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size); + + memset(mem, 0, nesqp->qp_mem_size); + + nesqp->hwqp.sq_vbase = mem; + mem += sizeof(struct nes_hw_qp_wqe) * sq_size; + + nesqp->hwqp.rq_vbase = mem; + nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase + + sizeof(struct nes_hw_qp_wqe) * sq_size; + mem += sizeof(struct nes_hw_qp_wqe) * rq_size; + + nesqp->hwqp.q2_vbase = mem; + nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase + + sizeof(struct nes_hw_qp_wqe) * rq_size; + mem += 256; + memset(nesqp->hwqp.q2_vbase, 0, 256); + + nesqp->nesqp_context = mem; + nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; + memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); + return 0; +} + + +/** + * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory. + */ +static inline void nes_free_qp_mem(struct nes_device *nesdev, + struct nes_qp *nesqp, int virt_wqs) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + if (!virt_wqs) { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + }else { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase ); + nesqp->pbl_vbase = NULL; + if (nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); + } + } +} + + +/** + * nes_create_qp + */ +static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, struct ib_udata *udata) +{ + u64 u64temp= 0; + u64 u64nesqp = 0; + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_qp *nesqp; + struct nes_cq *nescq; + struct nes_ucontext *nes_ucontext; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + struct nes_create_qp_req req; + struct nes_create_qp_resp uresp; + struct nes_pbl *nespbl = NULL; + u32 qp_num = 0; + u32 opcode = 0; + /* u32 counter = 0; */ + void *mem; + unsigned long flags; + int ret; + int err; + int virt_wqs = 0; + int sq_size; + int rq_size; + u8 sq_encoded_size; + u8 rq_encoded_size; + /* int counter; */ + + if (init_attr->create_flags) + return ERR_PTR(-EINVAL); + + atomic_inc(&qps_created); + switch (init_attr->qp_type) { + case IB_QPT_RC: + if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) { + init_attr->cap.max_inline_data = 0; + } else { + init_attr->cap.max_inline_data = 64; + } + sq_size = init_attr->cap.max_send_wr; + rq_size = init_attr->cap.max_recv_wr; + + /* check if the encoded sizes are OK or not... */ + sq_encoded_size = nes_get_encoded_size(&sq_size); + rq_encoded_size = nes_get_encoded_size(&rq_size); + + if ((!sq_encoded_size) || (!rq_encoded_size)) { + nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n", + rq_size, sq_size); + return ERR_PTR(-EINVAL); + } + + init_attr->cap.max_send_wr = sq_size -2; + init_attr->cap.max_recv_wr = rq_size -1; + nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size); + + ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps, + nesadapter->max_qp, &qp_num, &nesadapter->next_qp); + if (ret) { + return ERR_PTR(ret); + } + + /* Need 512 (actually now 1024) byte alignment on this structure */ + mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL); + if (!mem) { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_debug(NES_DBG_QP, "Unable to allocate QP\n"); + return ERR_PTR(-ENOMEM); + } + u64nesqp = (unsigned long)mem; + u64nesqp += ((u64)NES_SW_CONTEXT_ALIGN) - 1; + u64temp = ((u64)NES_SW_CONTEXT_ALIGN) - 1; + u64nesqp &= ~u64temp; + nesqp = (struct nes_qp *)(unsigned long)u64nesqp; + /* nes_debug(NES_DBG_QP, "nesqp=%p, allocated buffer=%p. Rounded to closest %u\n", + nesqp, mem, NES_SW_CONTEXT_ALIGN); */ + nesqp->allocated_buffer = mem; + + if (udata) { + if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); + return NULL; + } + if (req.user_wqe_buffers) { + virt_wqs = 1; + } + if ((ibpd->uobject) && (ibpd->uobject->context)) { + nesqp->user_mode = 1; + nes_ucontext = to_nesucontext(ibpd->uobject->context); + if (virt_wqs) { + err = 1; + list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { + if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { + list_del(&nespbl->list); + err = 0; + nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", + nespbl, nespbl->user_base); + break; + } + } + if (err) { + nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", + (long long unsigned int)req.user_wqe_buffers); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-EFAULT); + } + } + + nes_ucontext = to_nesucontext(ibpd->uobject->context); + nesqp->mmap_sq_db_index = + find_next_zero_bit(nes_ucontext->allocated_wqs, + NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); + /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", + nespd->mmap_db_index); */ + if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { + nes_debug(NES_DBG_QP, + "db index > max user regions, failing create QP\n"); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + if (virt_wqs) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + } + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); + nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; + nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1; + } else { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-EFAULT); + } + } + err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) : + nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size); + if (err) { + nes_debug(NES_DBG_QP, + "error geting qp mem code = %d\n", err); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + + nesqp->hwqp.sq_size = sq_size; + nesqp->hwqp.sq_encoded_size = sq_encoded_size; + nesqp->hwqp.sq_head = 1; + nesqp->hwqp.rq_size = rq_size; + nesqp->hwqp.rq_encoded_size = rq_encoded_size; + /* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n", + (void *)nesqp->nesqp_context_pbase); + */ + nesqp->hwqp.qp_id = qp_num; + nesqp->ibqp.qp_num = nesqp->hwqp.qp_id; + nesqp->nespd = nespd; + + nescq = to_nescq(init_attr->send_cq); + nesqp->nesscq = nescq; + nescq = to_nescq(init_attr->recv_cq); + nesqp->nesrcq = nescq; + + nesqp->nesqp_context->misc |= cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC_PCI_FCN_SHIFT); + nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.rq_encoded_size << + NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT); + nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size << + NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT); + if (!udata) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN); + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN); + } + nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number + + ((u32)nesqp->nesrcq->hw_cq.cq_number << 16)); + u64temp = (u64)nesqp->hwqp.sq_pbase; + nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + + + if (!virt_wqs) { + u64temp = (u64)nesqp->hwqp.sq_pbase; + nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)nesqp->hwqp.rq_pbase; + nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + } else { + u64temp = (u64)nesqp->pbl_pbase; + nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + } + + /* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n", + nesvnic->next_qp_nic_index, + nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + nesqp->nesqp_context->misc2 |= cpu_to_le32( + (u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] << + NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT); + nesvnic->next_qp_nic_index++; + if ((nesvnic->next_qp_nic_index > 3) || + (nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] == 0xf)) { + nesvnic->next_qp_nic_index = 0; + } + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16); + u64temp = (u64)nesqp->hwqp.q2_pbase; + nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp))); + nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp)))); + nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM | + NES_QPCONTEXT_ORDIRD_AAH | + ((((u32)nesadapter->max_irrq_wr) << + NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK)); + if (disable_mpa_crc) { + nes_debug(NES_DBG_QP, "Disabling MPA crc checking due to module option.\n"); + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_RNMC); + } + + + /* Create the QP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n"); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + if (!virt_wqs) { + opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | + NES_CQP_QP_IWARP_STATE_IDLE; + } else { + opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS | + NES_CQP_QP_IWARP_STATE_IDLE; + } + opcode |= NES_CQP_QP_CQS_VALID; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", + nesqp->hwqp.qp_id); + ret = wait_event_timeout(cqp_request->waitq, + (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u," + " nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, + cqp_request->major_code, cqp_request->minor_code); + if ((!ret) || (cqp_request->major_code)) { + nes_put_cqp_request(nesdev, cqp_request); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); + kfree(nesqp->allocated_buffer); + if (!ret) { + return ERR_PTR(-ETIME); + } else { + return ERR_PTR(-EIO); + } + } + + nes_put_cqp_request(nesdev, cqp_request); + + if (ibpd->uobject) { + uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; + uresp.actual_sq_size = sq_size; + uresp.actual_rq_size = rq_size; + uresp.qp_id = nesqp->hwqp.qp_id; + uresp.nes_drv_opt = nes_drv_opt; + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-EFAULT); + } + } + + nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n", + nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp)); + spin_lock_init(&nesqp->lock); + nes_add_ref(&nesqp->ibqp); + break; + default: + nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); + + /* update the QP table */ + nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; + nes_debug(NES_DBG_QP, "netdev refcnt=%u\n", + netdev_refcnt_read(nesvnic->netdev)); + + return &nesqp->ibqp; +} + + +/** + * nes_clean_cq + */ +static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq) +{ + u32 cq_head; + u32 lo; + u32 hi; + u64 u64temp; + unsigned long flags = 0; + + spin_lock_irqsave(&nescq->lock, flags); + + cq_head = nescq->hw_cq.cq_head; + while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { + rmb(); + lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); + hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]); + u64temp = (((u64)hi) << 32) | ((u64)lo); + u64temp &= ~(NES_SW_CONTEXT_ALIGN-1); + if (u64temp == (u64)(unsigned long)nesqp) { + /* Zero the context value so cqe will be ignored */ + nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0; + nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0; + } + + if (++cq_head >= nescq->hw_cq.cq_size) + cq_head = 0; + } + + spin_unlock_irqrestore(&nescq->lock, flags); +} + + +/** + * nes_destroy_qp + */ +static int nes_destroy_qp(struct ib_qp *ibqp) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_ucontext *nes_ucontext; + struct ib_qp_attr attr; + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + int ret = 0; + + atomic_inc(&sw_qps_destroyed); + nesqp->destroyed = 1; + + /* Blow away the connection if it exists. */ + if (nesqp->ibqp_state >= IB_QPS_INIT && nesqp->ibqp_state <= IB_QPS_RTS) { + /* if (nesqp->ibqp_state == IB_QPS_RTS) { */ + attr.qp_state = IB_QPS_ERR; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); + } + + if (((nesqp->ibqp_state == IB_QPS_INIT) || + (nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) { + cm_id = nesqp->cm_id; + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = -ETIMEDOUT; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + + nes_debug(NES_DBG_QP, "Generating a CM Timeout Event for " + "QP%u. cm_id = %p, refcount = %u. \n", + nesqp->hwqp.qp_id, cm_id, atomic_read(&nesqp->refcount)); + + cm_id->rem_ref(cm_id); + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret); + } + + if (nesqp->user_mode) { + if ((ibqp->uobject)&&(ibqp->uobject->context)) { + nes_ucontext = to_nesucontext(ibqp->uobject->context); + clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); + nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL; + if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) { + nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index; + } + } + if (nesqp->pbl_pbase && nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); + } + } else { + /* Clean any pending completions from the cq(s) */ + if (nesqp->nesscq) + nes_clean_cq(nesqp, nesqp->nesscq); + + if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq)) + nes_clean_cq(nesqp, nesqp->nesrcq); + } + nes_rem_ref(&nesqp->ibqp); + return 0; +} + + +/** + * nes_create_cq + */ +static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, + struct ib_ucontext *context, struct ib_udata *udata) +{ + u64 u64temp; + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_cq *nescq; + struct nes_ucontext *nes_ucontext = NULL; + struct nes_cqp_request *cqp_request; + void *mem = NULL; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_pbl *nespbl = NULL; + struct nes_create_cq_req req; + struct nes_create_cq_resp resp; + u32 cq_num = 0; + u32 opcode = 0; + u32 pbl_entries = 1; + int err; + unsigned long flags; + int ret; + + if (entries > nesadapter->max_cqe) + return ERR_PTR(-EINVAL); + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, + nesadapter->max_cq, &cq_num, &nesadapter->next_cq); + if (err) { + return ERR_PTR(err); + } + + nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL); + if (!nescq) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n"); + return ERR_PTR(-ENOMEM); + } + + nescq->hw_cq.cq_size = max(entries + 1, 5); + nescq->hw_cq.cq_number = cq_num; + nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1; + + + if (context) { + nes_ucontext = to_nesucontext(context); + if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-EFAULT); + } + nesvnic->mcrq_ucontext = nes_ucontext; + nes_ucontext->mcrqf = req.mcrqf; + if (nes_ucontext->mcrqf) { + if (nes_ucontext->mcrqf & 0x80000000) + nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 28 + 2 * ((nes_ucontext->mcrqf & 0xf) - 1); + else if (nes_ucontext->mcrqf & 0x40000000) + nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff; + else + nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1; + nescq->mcrqf = nes_ucontext->mcrqf; + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + } + nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n", + (unsigned long)req.user_cq_buffer, entries); + err = 1; + list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) { + if (nespbl->user_base == (unsigned long )req.user_cq_buffer) { + list_del(&nespbl->list); + err = 0; + nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n", + nespbl); + break; + } + } + if (err) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-EFAULT); + } + + pbl_entries = nespbl->pbl_size >> 3; + nescq->cq_mem_size = 0; + } else { + nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe); + nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n", + entries, nescq->cq_mem_size, nescq->hw_cq.cq_number); + + /* allocate the physical buffer space */ + mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size, + &nescq->hw_cq.cq_pbase); + if (!mem) { + printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n"); + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } + + memset(mem, 0, nescq->cq_mem_size); + nescq->hw_cq.cq_vbase = mem; + nescq->hw_cq.cq_head = 0; + nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n", + nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase, + (u32)nescq->hw_cq.cq_pbase); + } + + nescq->hw_cq.ce_handler = nes_iwarp_ce_handler; + spin_lock_init(&nescq->lock); + + /* send CreateCQ request to CQP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } + + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + opcode = NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + NES_CQP_CQ_CHK_OVERFLOW | + NES_CQP_CQ_CEQE_MASK | ((u32)nescq->hw_cq.cq_size << 16); + + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + + if (pbl_entries != 1) { + if (pbl_entries > 32) { + /* use 4k pbl */ + nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries); + if (nesadapter->free_4kpbl == 0) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } else { + opcode |= (NES_CQP_CQ_VIRT | NES_CQP_CQ_4KB_CHUNK); + nescq->virtual_cq = 2; + nesadapter->free_4kpbl--; + } + } else { + /* use 256 byte pbl */ + nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries); + if (nesadapter->free_256pbl == 0) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-ENOMEM); + } else { + opcode |= NES_CQP_CQ_VIRT; + nescq->virtual_cq = 1; + nesadapter->free_256pbl--; + } + } + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16))); + + if (context) { + if (pbl_entries != 1) + u64temp = (u64)nespbl->pbl_pbase; + else + u64temp = le64_to_cpu(nespbl->pbl_vbase[0]); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX, + nes_ucontext->mmap_db_index[0]); + } else { + u64temp = (u64)nescq->hw_cq.cq_pbase; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + } + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (u64)(unsigned long)&nescq->hw_cq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = + cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", + nescq->hw_cq.cq_number); + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT * 2); + nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", + nescq->hw_cq.cq_number, ret); + if ((!ret) || (cqp_request->major_code)) { + nes_put_cqp_request(nesdev, cqp_request); + if (!context) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, + nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-EIO); + } + nes_put_cqp_request(nesdev, cqp_request); + + if (context) { + /* free the nespbl */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + resp.cq_id = nescq->hw_cq.cq_number; + resp.cq_size = nescq->hw_cq.cq_size; + resp.mmap_db_index = 0; + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); + kfree(nescq); + return ERR_PTR(-EFAULT); + } + } + + return &nescq->ibcq; +} + + +/** + * nes_destroy_cq + */ +static int nes_destroy_cq(struct ib_cq *ib_cq) +{ + struct nes_cq *nescq; + struct nes_device *nesdev; + struct nes_vnic *nesvnic; + struct nes_adapter *nesadapter; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + u32 opcode = 0; + int ret; + + if (ib_cq == NULL) + return 0; + + nescq = to_nescq(ib_cq); + nesvnic = to_nesvnic(ib_cq->device); + nesdev = nesvnic->nesdev; + nesadapter = nesdev->nesadapter; + + nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number); + + /* Send DestroyCQ request to CQP */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + opcode = NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nescq->virtual_cq == 1) { + nesadapter->free_256pbl++; + if (nesadapter->free_256pbl > nesadapter->max_256pbl) { + printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n", + __func__, nesadapter->free_256pbl, nesadapter->max_256pbl); + } + } else if (nescq->virtual_cq == 2) { + nesadapter->free_4kpbl++; + if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) { + printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n", + __func__, nesadapter->free_4kpbl, nesadapter->max_4kpbl); + } + opcode |= NES_CQP_CQ_4KB_CHUNK; + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); + if (!nescq->mcrqf) + nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", + nescq->hw_cq.cq_number); + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + nescq->hw_cq.cq_number, ret, cqp_request->major_code, + cqp_request->minor_code); + if (!ret) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", + nescq->hw_cq.cq_number); + ret = -ETIME; + } else if (cqp_request->major_code) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", + nescq->hw_cq.cq_number); + ret = -EIO; + } else { + ret = 0; + } + nes_put_cqp_request(nesdev, cqp_request); + + if (nescq->cq_mem_size) + pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, + nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase); + kfree(nescq); + + return ret; +} + +/** + * root_256 + */ +static u32 root_256(struct nes_device *nesdev, + struct nes_root_vpbl *root_vpbl, + struct nes_root_vpbl *new_root, + u16 pbl_count_4k) +{ + u64 leaf_pbl; + int i, j, k; + + if (pbl_count_4k == 1) { + new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 512, &new_root->pbl_pbase); + + if (new_root->pbl_vbase == NULL) + return 0; + + leaf_pbl = (u64)root_vpbl->pbl_pbase; + for (i = 0; i < 16; i++) { + new_root->pbl_vbase[i].pa_low = + cpu_to_le32((u32)leaf_pbl); + new_root->pbl_vbase[i].pa_high = + cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); + leaf_pbl += 256; + } + } else { + for (i = 3; i >= 0; i--) { + j = i * 16; + root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i]; + leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) + + (((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high)) + << 32); + for (k = 1; k < 16; k++) { + leaf_pbl += 256; + root_vpbl->pbl_vbase[j + k].pa_low = + cpu_to_le32((u32)leaf_pbl); + root_vpbl->pbl_vbase[j + k].pa_high = + cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); + } + } + } + + return 1; +} + + +/** + * nes_reg_mr + */ +static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, + u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl, + dma_addr_t single_buffer, u16 pbl_count_4k, + u16 residual_page_count_4k, int acc, u64 *iova_start, + u16 *actual_pbl_cnt, u8 *used_4k_pbls) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + int ret; + struct nes_adapter *nesadapter = nesdev->nesadapter; + uint pg_cnt = 0; + u16 pbl_count_256 = 0; + u16 pbl_count = 0; + u8 use_256_pbls = 0; + u8 use_4k_pbls = 0; + u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0; + struct nes_root_vpbl new_root = { 0, NULL, NULL }; + u32 opcode = 0; + u16 major_code; + + /* Register the region with the adapter */ + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + if (pbl_count_4k) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + + pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k; + pbl_count_256 = (pg_cnt + 31) / 32; + if (pg_cnt <= 32) { + if (pbl_count_256 <= nesadapter->free_256pbl) + use_256_pbls = 1; + else if (pbl_count_4k <= nesadapter->free_4kpbl) + use_4k_pbls = 1; + } else if (pg_cnt <= 2048) { + if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) && + (nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) { + use_4k_pbls = 1; + } else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) { + use_256_pbls = 1; + use_two_level = 1; + } else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { + use_4k_pbls = 1; + } + } else { + if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl) + use_4k_pbls = 1; + } + + if (use_256_pbls) { + pbl_count = pbl_count_256; + nesadapter->free_256pbl -= pbl_count + use_two_level; + } else if (use_4k_pbls) { + pbl_count = pbl_count_4k; + nesadapter->free_4kpbl -= pbl_count + use_two_level; + } else { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_debug(NES_DBG_MR, "Out of Pbls\n"); + nes_free_cqp_request(nesdev, cqp_request); + return -ENOMEM; + } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + + if (use_256_pbls && use_two_level) { + if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k) == 1) { + if (new_root.pbl_pbase != 0) + root_vpbl = &new_root; + } else { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl += pbl_count_256 + use_two_level; + use_256_pbls = 0; + + if (pbl_count_4k == 1) + use_two_level = 0; + pbl_count = pbl_count_4k; + + if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { + nesadapter->free_4kpbl -= pbl_count + use_two_level; + use_4k_pbls = 1; + } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + if (use_4k_pbls == 0) + return -ENOMEM; + } + } + + opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ | + NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; + if (acc & IB_ACCESS_LOCAL_WRITE) + opcode |= NES_CQP_STAG_RIGHTS_LOCAL_WRITE; + if (acc & IB_ACCESS_REMOTE_WRITE) + opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_REM_ACC_EN; + if (acc & IB_ACCESS_REMOTE_READ) + opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_REM_ACC_EN; + if (acc & IB_ACCESS_MW_BIND) + opcode |= NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN; + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, *iova_start); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, region_length); + + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = + cpu_to_le32((u32)(region_length >> 8) & 0xff000000); + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |= + cpu_to_le32(nespd->pd_id & 0x00007fff); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + + if (pbl_count == 0) { + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, single_buffer); + } else { + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8)); + + if (use_4k_pbls) + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); + } + barrier(); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X.\n", + stag, ret, cqp_request->major_code, cqp_request->minor_code); + major_code = cqp_request->major_code; + nes_put_cqp_request(nesdev, cqp_request); + + if ((!ret || major_code) && pbl_count != 0) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (use_256_pbls) + nesadapter->free_256pbl += pbl_count + use_two_level; + else if (use_4k_pbls) + nesadapter->free_4kpbl += pbl_count + use_two_level; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + if (new_root.pbl_pbase) + pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase, + new_root.pbl_pbase); + + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + + *actual_pbl_cnt = pbl_count + use_two_level; + *used_4k_pbls = use_4k_pbls; + return 0; +} + + +/** + * nes_reg_phys_mr + */ +static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, + struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, + u64 * iova_start) +{ + u64 region_length; + struct nes_pd *nespd = to_nespd(ib_pd); + struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_mr *nesmr; + struct ib_mr *ibmr; + struct nes_vpbl vpbl; + struct nes_root_vpbl root_vpbl; + u32 stag; + u32 i; + unsigned long mask; + u32 stag_index = 0; + u32 next_stag_index = 0; + u32 driver_key = 0; + u32 root_pbl_index = 0; + u32 cur_pbl_index = 0; + int err = 0; + int ret = 0; + u16 pbl_count = 0; + u8 single_page = 1; + u8 stag_key = 0; + + region_length = 0; + vpbl.pbl_vbase = NULL; + root_vpbl.pbl_vbase = NULL; + root_vpbl.pbl_pbase = 0; + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + + driver_key = 0; + + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + if (num_phys_buf > (1024*512)) { + return ERR_PTR(-E2BIG); + } + + if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK) + return ERR_PTR(-EINVAL); + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, + &stag_index, &next_stag_index); + if (err) { + return ERR_PTR(err); + } + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < num_phys_buf; i++) { + + if ((i & 0x01FF) == 0) { + if (root_pbl_index == 1) { + /* Allocate the root PBL */ + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, + &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + return ERR_PTR(-ENOMEM); + } + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + return ERR_PTR(-ENOMEM); + } + root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; + } + /* Allocate a 4K buffer for the PBL */ + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n", + vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_phys_err; + } + /* Fill in the root table */ + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; + } + + mask = !buffer_list[i].size; + if (i != 0) + mask |= buffer_list[i].addr; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + + if (mask & ~PAGE_MASK) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_phys_err; + } + + region_length += buffer_list[i].size; + if ((i != 0) && (single_page)) { + if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr) + single_page = 0; + } + vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK); + vpbl.pbl_vbase[cur_pbl_index++].pa_high = + cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32))); + } + + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + + nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%016lX," + " length = 0x%016lX, index = 0x%08X\n", + stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index); + + /* Make the leaf PBL the root if only one PBL */ + if (root_pbl_index == 1) { + root_vpbl.pbl_pbase = vpbl.pbl_pbase; + } + + if (single_page) { + pbl_count = 0; + } else { + pbl_count = root_pbl_index; + } + ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl, + buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start, + &nesmr->pbls_used, &nesmr->pbl_4k); + + if (ret == 0) { + nesmr->ibmr.rkey = stag; + nesmr->ibmr.lkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_MEM; + ibmr = &nesmr->ibmr; + } else { + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + } + + reg_phys_err: + /* free the resources */ + if (root_pbl_index == 1) { + /* single PBL case */ + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); + } else { + for (i=0; ipcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase, + root_vpbl.leaf_vpbl[i].pbl_pbase); + } + kfree(root_vpbl.leaf_vpbl); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + } + + return ibmr; +} + + +/** + * nes_get_dma_mr + */ +static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ib_phys_buf bl; + u64 kva = 0; + + nes_debug(NES_DBG_MR, "\n"); + + bl.size = (u64)0xffffffffffULL; + bl.addr = 0; + return nes_reg_phys_mr(pd, &bl, 1, acc, &kva); +} + + +/** + * nes_reg_user_mr + */ +static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + u64 iova_start; + __le64 *pbl; + u64 region_length; + dma_addr_t last_dma_addr = 0; + dma_addr_t first_dma_addr = 0; + struct nes_pd *nespd = to_nespd(pd); + struct nes_vnic *nesvnic = to_nesvnic(pd->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct ib_mr *ibmr = ERR_PTR(-EINVAL); + struct ib_umem_chunk *chunk; + struct nes_ucontext *nes_ucontext; + struct nes_pbl *nespbl; + struct nes_mr *nesmr; + struct ib_umem *region; + struct nes_mem_reg_req req; + struct nes_vpbl vpbl; + struct nes_root_vpbl root_vpbl; + int nmap_index, page_index; + int page_count = 0; + int err, pbl_depth = 0; + int chunk_pages; + int ret; + u32 stag; + u32 stag_index = 0; + u32 next_stag_index; + u32 driver_key; + u32 root_pbl_index = 0; + u32 cur_pbl_index = 0; + u32 skip_pages; + u16 pbl_count; + u8 single_page = 1; + u8 stag_key; + + region = ib_umem_get(pd->uobject->context, start, length, acc, 0); + if (IS_ERR(region)) { + return (struct ib_mr *)region; + } + + nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u," + " offset = %u, page size = %u.\n", + (unsigned long int)start, (unsigned long int)virt, (u32)length, + region->offset, region->page_size); + + skip_pages = ((u32)region->offset) >> 12; + + if (ib_copy_from_udata(&req, udata, sizeof(req))) { + ib_umem_release(region); + return ERR_PTR(-EFAULT); + } + nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type); + + switch (req.reg_type) { + case IWNES_MEMREG_TYPE_MEM: + pbl_depth = 0; + region_length = 0; + vpbl.pbl_vbase = NULL; + root_vpbl.pbl_vbase = NULL; + root_vpbl.pbl_pbase = 0; + + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + + driver_key = next_stag_index & 0x70000000; + + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, + nesadapter->max_mr, &stag_index, &next_stag_index); + if (err) { + ib_umem_release(region); + return ERR_PTR(err); + } + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); + } + nesmr->region = region; + + list_for_each_entry(chunk, ®ion->chunk_list, list) { + nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n", + chunk->nents, chunk->nmap); + for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { + if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", + (unsigned int) sg_dma_address(&chunk->page_list[nmap_index])); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } + + if (!sg_dma_len(&chunk->page_list[nmap_index])) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } + + region_length += sg_dma_len(&chunk->page_list[nmap_index]); + chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; + region_length -= skip_pages << 12; + for (page_index=skip_pages; page_index < chunk_pages; page_index++) { + skip_pages = 0; + if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) + goto enough_pages; + if ((page_count&0x01FF) == 0) { + if (page_count >= 1024 * 512) { + ib_umem_release(region); + nes_free_resource(nesadapter, + nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-E2BIG); + goto reg_user_mr_err; + } + if (root_pbl_index == 1) { + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 8192, &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { + ib_umem_release(region); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + goto reg_user_mr_err; + } + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, + GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { + ib_umem_release(region); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + goto reg_user_mr_err; + } + root_vpbl.pbl_vbase[0].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; + } + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", + vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_user_mr_err; + } + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; + } + if (single_page) { + if (page_count != 0) { + if ((last_dma_addr+4096) != + (sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096))) + single_page = 0; + last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096); + } else { + first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096); + last_dma_addr = first_dma_addr; + } + } + + vpbl.pbl_vbase[cur_pbl_index].pa_low = + cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096))); + vpbl.pbl_vbase[cur_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096))) >> 32))); + cur_pbl_index++; + page_count++; + } + } + } + enough_pages: + nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x," + " stag_key=0x%08x\n", + stag_index, driver_key, stag_key); + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + + iova_start = virt; + /* Make the leaf PBL the root if only one PBL */ + if (root_pbl_index == 1) { + root_vpbl.pbl_pbase = vpbl.pbl_pbase; + } + + if (single_page) { + pbl_count = 0; + } else { + pbl_count = root_pbl_index; + first_dma_addr = 0; + } + nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X," + " index = 0x%08X, region->length=0x%08llx, pbl_count = %u\n", + stag, (unsigned int)iova_start, + (unsigned int)region_length, stag_index, + (unsigned long long)region->length, pbl_count); + ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl, + first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, + &iova_start, &nesmr->pbls_used, &nesmr->pbl_4k); + + nes_debug(NES_DBG_MR, "ret=%d\n", ret); + + if (ret == 0) { + nesmr->ibmr.rkey = stag; + nesmr->ibmr.lkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_MEM; + ibmr = &nesmr->ibmr; + } else { + ib_umem_release(region); + kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); + } + + reg_user_mr_err: + /* free the resources */ + if (root_pbl_index == 1) { + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + } else { + for (page_index=0; page_indexpcidev, 4096, + root_vpbl.leaf_vpbl[page_index].pbl_vbase, + root_vpbl.leaf_vpbl[page_index].pbl_pbase); + } + kfree(root_vpbl.leaf_vpbl); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + } + + nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr); + + return ibmr; + case IWNES_MEMREG_TYPE_QP: + case IWNES_MEMREG_TYPE_CQ: + nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); + if (!nespbl) { + nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); + ib_umem_release(region); + return ERR_PTR(-ENOMEM); + } + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + ib_umem_release(region); + kfree(nespbl); + nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n"); + return ERR_PTR(-ENOMEM); + } + nesmr->region = region; + nes_ucontext = to_nesucontext(pd->uobject->context); + pbl_depth = region->length >> 12; + pbl_depth += (region->length & (4096-1)) ? 1 : 0; + nespbl->pbl_size = pbl_depth*sizeof(u64); + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { + nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory"); + } else { + nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory"); + } + + nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n", + nespbl->pbl_size, pbl_depth); + pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size, + &nespbl->pbl_pbase); + if (!pbl) { + ib_umem_release(region); + kfree(nesmr); + kfree(nespbl); + nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n"); + return ERR_PTR(-ENOMEM); + } + + nespbl->pbl_vbase = (u64 *)pbl; + nespbl->user_base = start; + nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%lx," + " pbl_vbase=%p user_base=0x%lx\n", + nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase, + (void *) nespbl->pbl_vbase, nespbl->user_base); + + list_for_each_entry(chunk, ®ion->chunk_list, list) { + for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { + chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; + chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0; + nespbl->page = sg_page(&chunk->page_list[0]); + for (page_index=0; page_indexpage_list[nmap_index])+ + (page_index*4096))); + ((__le32 *)pbl)[1] = cpu_to_le32(((u64) + (sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*4096)))>>32); + nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, + (unsigned long long)*pbl, + le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); + pbl++; + } + } + } + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { + list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); + } else { + list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list); + } + nesmr->ibmr.rkey = -1; + nesmr->ibmr.lkey = -1; + nesmr->mode = req.reg_type; + return &nesmr->ibmr; + } + + ib_umem_release(region); + return ERR_PTR(-ENOSYS); +} + + +/** + * nes_dereg_mr + */ +static int nes_dereg_mr(struct ib_mr *ib_mr) +{ + struct nes_mr *nesmr = to_nesmr(ib_mr); + struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + int ret; + u16 major_code; + u16 minor_code; + + if (nesmr->region) { + ib_umem_release(nesmr->region); + } + if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) { + kfree(nesmr); + return 0; + } + + /* Deallocate the region with the adapter */ + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + cqp_request->waiting = 1; + cqp_wqe = &cqp_request->cqp_wqe; + + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO | + NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u," + " CQP Major:Minor codes = 0x%04X:0x%04X\n", + ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code); + + major_code = cqp_request->major_code; + minor_code = cqp_request->minor_code; + + nes_put_cqp_request(nesdev, cqp_request); + + if (!ret) { + nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag," + " ib_mr=%p, rkey = 0x%08X\n", + ib_mr, ib_mr->rkey); + return -ETIME; + } else if (major_code) { + nes_debug(NES_DBG_MR, "Error (0x%04X:0x%04X) while attempting" + " to destroy STag, ib_mr=%p, rkey = 0x%08X\n", + major_code, minor_code, ib_mr, ib_mr->rkey); + return -EIO; + } + + if (nesmr->pbls_used != 0) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nesmr->pbl_4k) { + nesadapter->free_4kpbl += nesmr->pbls_used; + if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) + printk(KERN_ERR PFX "free 4KB PBLs(%u) has " + "exceeded the max(%u)\n", + nesadapter->free_4kpbl, + nesadapter->max_4kpbl); + } else { + nesadapter->free_256pbl += nesmr->pbls_used; + if (nesadapter->free_256pbl > nesadapter->max_256pbl) + printk(KERN_ERR PFX "free 256B PBLs(%u) has " + "exceeded the max(%u)\n", + nesadapter->free_256pbl, + nesadapter->max_256pbl); + } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + (ib_mr->rkey & 0x0fffff00) >> 8); + + kfree(nesmr); + + return 0; +} + + +/** + * show_rev + */ +static ssize_t show_rev(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nes_ib_device *nesibdev = + container_of(dev, struct nes_ib_device, ibdev.dev); + struct nes_vnic *nesvnic = nesibdev->nesvnic; + + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev); +} + + +/** + * show_fw_ver + */ +static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nes_ib_device *nesibdev = + container_of(dev, struct nes_ib_device, ibdev.dev); + struct nes_vnic *nesvnic = nesibdev->nesvnic; + + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "%u.%u\n", + (nesvnic->nesdev->nesadapter->firmware_version >> 16), + (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); +} + + +/** + * show_hca + */ +static ssize_t show_hca(struct device *dev, struct device_attribute *attr, + char *buf) +{ + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "NES020\n"); +} + + +/** + * show_board + */ +static ssize_t show_board(struct device *dev, struct device_attribute *attr, + char *buf) +{ + nes_debug(NES_DBG_INIT, "\n"); + return sprintf(buf, "%.*s\n", 32, "NES020 Board ID"); +} + + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *nes_dev_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id +}; + + +/** + * nes_query_qp + */ +static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + + nes_debug(NES_DBG_QP, "\n"); + + attr->qp_access_flags = 0; + attr->cap.max_send_wr = nesqp->hwqp.sq_size; + attr->cap.max_recv_wr = nesqp->hwqp.rq_size; + attr->cap.max_recv_sge = 1; + if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) + attr->cap.max_inline_data = 0; + else + attr->cap.max_inline_data = 64; + + init_attr->event_handler = nesqp->ibqp.event_handler; + init_attr->qp_context = nesqp->ibqp.qp_context; + init_attr->send_cq = nesqp->ibqp.send_cq; + init_attr->recv_cq = nesqp->ibqp.recv_cq; + init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq; + init_attr->cap = attr->cap; + + return 0; +} + + +/** + * nes_hw_modify_qp + */ +int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, + u32 next_iwarp_state, u32 termlen, u32 wait_completion) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + /* struct iw_cm_id *cm_id = nesqp->cm_id; */ + /* struct iw_cm_event cm_event; */ + struct nes_cqp_request *cqp_request; + int ret; + u16 major_code; + + nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n"); + return -ENOMEM; + } + if (wait_completion) { + cqp_request->waiting = 1; + } else { + cqp_request->waiting = 0; + } + cqp_wqe = &cqp_request->cqp_wqe; + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + NES_CQP_MODIFY_QP | NES_CQP_QP_TYPE_IWARP | next_iwarp_state); + nes_debug(NES_DBG_MOD_QP, "using next_iwarp_state=%08x, wqe_words=%08x\n", + next_iwarp_state, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])); + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); + + /* If sending a terminate message, fill in the length (in words) */ + if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) && + !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) { + termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen); + } + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request); + + /* Wait for CQP */ + if (wait_completion) { + /* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n", + nesqp->hwqp.qp_id); */ + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, " + "CQP Major:Minor codes = 0x%04X:0x%04X.\n", + nesqp->hwqp.qp_id, ret, cqp_request->major_code, cqp_request->minor_code); + major_code = cqp_request->major_code; + if (major_code) { + nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u failed" + "CQP Major:Minor codes = 0x%04X:0x%04X, intended next state = 0x%08X.\n", + nesqp->hwqp.qp_id, cqp_request->major_code, + cqp_request->minor_code, next_iwarp_state); + } + + nes_put_cqp_request(nesdev, cqp_request); + + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + else + return 0; + } else { + return 0; + } +} + + +/** + * nes_modify_qp + */ +int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + /* u32 cqp_head; */ + /* u32 counter; */ + u32 next_iwarp_state = 0; + int err; + unsigned long qplockflags; + int ret; + u16 original_last_aeq; + u8 issue_modify_qp = 0; + u8 dont_wait = 0; + + nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u," + " iwarp_state=0x%X, refcount=%d\n", + nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state, + nesqp->iwarp_state, atomic_read(&nesqp->refcount)); + + spin_lock_irqsave(&nesqp->lock, qplockflags); + + nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X," + " QP Access Flags=0x%X, attr_mask = 0x%0x\n", + nesqp->hwqp.qp_id, nesqp->hw_iwarp_state, + nesqp->hw_tcp_state, attr->qp_access_flags, attr_mask); + + if (attr_mask & IB_QP_STATE) { + switch (attr->qp_state) { + case IB_QPS_INIT: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = init\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + } + next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; + issue_modify_qp = 1; + break; + case IB_QPS_RTR: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rtr\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + } + next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; + issue_modify_qp = 1; + break; + case IB_QPS_RTS: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rts\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + } + if (nesqp->cm_id == NULL) { + nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n", + nesqp->hwqp.qp_id ); + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + } + next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS; + if (nesqp->iwarp_state != NES_CQP_QP_IWARP_STATE_RTS) + next_iwarp_state |= NES_CQP_QP_CONTEXT_VALID | + NES_CQP_QP_ARP_VALID | NES_CQP_QP_ORD_VALID; + issue_modify_qp = 1; + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_ESTABLISHED; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_RTS; + nesqp->hte_added = 1; + break; + case IB_QPS_SQD: + issue_modify_qp = 1; + nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); + if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return 0; + } else { + if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { + nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing" + " ignored due to current iWARP state\n", + nesqp->hwqp.qp_id); + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + } + if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) { + nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing" + " already done based on hw state.\n", + nesqp->hwqp.qp_id); + issue_modify_qp = 0; + } + switch (nesqp->hw_iwarp_state) { + case NES_AEQE_IWARP_STATE_CLOSING: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + case NES_AEQE_IWARP_STATE_TERMINATE: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; + break; + case NES_AEQE_IWARP_STATE_ERROR: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; + break; + default: + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + break; + } + } + break; + case IB_QPS_SQE: + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = terminate\n", + nesqp->hwqp.qp_id); + if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + } + /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ + next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; + issue_modify_qp = 1; + break; + case IB_QPS_ERR: + case IB_QPS_RESET: + if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + } + nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", + nesqp->hwqp.qp_id); + if (nesqp->term_flags) + del_timer(&nesqp->terminate_timer); + + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; + /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ + if (nesqp->hte_added) { + nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n"); + next_iwarp_state |= NES_CQP_QP_DEL_HTE; + nesqp->hte_added = 0; + } + if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) && + (nesdev->iw_status) && + (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) { + next_iwarp_state |= NES_CQP_QP_RESET; + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n", + nesqp->hwqp.qp_id, nesqp->hw_tcp_state); + dont_wait = 1; + } + issue_modify_qp = 1; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; + break; + default: + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + return -EINVAL; + break; + } + + nesqp->ibqp_state = attr->qp_state; + if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == + (u32)NES_CQP_QP_IWARP_STATE_RTS) && + ((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) > + (u32)NES_CQP_QP_IWARP_STATE_RTS)) { + nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; + nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", + nesqp->iwarp_state); + } else { + nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; + nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", + nesqp->iwarp_state); + } + } + + if (attr_mask & IB_QP_ACCESS_FLAGS) { + if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN | + NES_QPCONTEXT_MISC_RDMA_READ_EN); + issue_modify_qp = 1; + } + if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN); + issue_modify_qp = 1; + } + if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_READ_EN); + issue_modify_qp = 1; + } + if (attr->qp_access_flags & IB_ACCESS_MW_BIND) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WBIND_EN); + issue_modify_qp = 1; + } + + if (nesqp->user_mode) { + nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN | + NES_QPCONTEXT_MISC_RDMA_READ_EN); + issue_modify_qp = 1; + } + } + + original_last_aeq = nesqp->last_aeq; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + + nes_debug(NES_DBG_MOD_QP, "issue_modify_qp=%u\n", issue_modify_qp); + + ret = 0; + + + if (issue_modify_qp) { + nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n"); + ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1); + if (ret) + nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)" + " failed for QP%u.\n", + next_iwarp_state, nesqp->hwqp.qp_id); + + } + + if ((issue_modify_qp) && (nesqp->ibqp_state > IB_QPS_RTS)) { + nes_debug(NES_DBG_MOD_QP, "QP%u Issued ModifyQP refcount (%d)," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + if ((!ret) || + ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) && + (ret))) { + if (dont_wait) { + if (nesqp->cm_id && nesqp->hw_tcp_state != 0) { + nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d)," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + /* this one is for the cm_disconnect thread */ + spin_lock_irqsave(&nesqp->lock, qplockflags); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); + } + } else { + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + /* These two are for the timer thread */ + if (atomic_inc_return(&nesqp->close_timer_started) == 1) { + nesqp->cm_id->add_ref(nesqp->cm_id); + nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," + " need ae to finish up, original_last_aeq = 0x%04X." + " last_aeq = 0x%04X, scheduling timer.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + schedule_nes_timer(nesqp->cm_node, (struct sk_buff *) nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); + } + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," + " need ae to finish up, original_last_aeq = 0x%04X." + " last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + } + } + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + } + } else { + nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," + " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), + original_last_aeq, nesqp->last_aeq); + } + + err = 0; + + nes_debug(NES_DBG_MOD_QP, "QP%u Leaving, refcount=%d\n", + nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); + + return err; +} + + +/** + * nes_muticast_attach + */ +static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + nes_debug(NES_DBG_INIT, "\n"); + return -ENOSYS; +} + + +/** + * nes_multicast_detach + */ +static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + nes_debug(NES_DBG_INIT, "\n"); + return -ENOSYS; +} + + +/** + * nes_process_mad + */ +static int nes_process_mad(struct ib_device *ibdev, int mad_flags, + u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + nes_debug(NES_DBG_INIT, "\n"); + return -ENOSYS; +} + +static inline void +fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey) +{ + int sge_index; + int total_payload_length = 0; + for (sge_index = 0; sge_index < ib_wr->num_sge; sge_index++) { + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_LENGTH0_IDX + (sge_index*4), + ib_wr->sg_list[sge_index].length); + if (uselkey) + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), + (ib_wr->sg_list[sge_index].lkey)); + else + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), 0); + + total_payload_length += ib_wr->sg_list[sge_index].length; + } + nes_debug(NES_DBG_IW_TX, "UC UC UC, sending total_payload_length=%u \n", + total_payload_length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + total_payload_length); +} + +/** + * nes_post_send + */ +static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr) +{ + u64 u64temp; + unsigned long flags = 0; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_hw_qp_wqe *wqe; + int err = 0; + u32 qsize = nesqp->hwqp.sq_size; + u32 head; + u32 wqe_misc = 0; + u32 wqe_count = 0; + u32 counter; + + if (nesqp->ibqp_state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } + + spin_lock_irqsave(&nesqp->lock, flags); + + head = nesqp->hwqp.sq_head; + + while (ib_wr) { + /* Check for QP error */ + if (nesqp->term_flags) { + err = -EINVAL; + break; + } + + /* Check for SQ overflow */ + if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { + err = -ENOMEM; + break; + } + + wqe = &nesqp->hwqp.sq_vbase[head]; + /* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n", + nesqp->hwqp.qp_id, wqe, head); */ + nes_fill_init_qp_wqe(wqe, nesqp, head); + u64temp = (u64)(ib_wr->wr_id); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, + u64temp); + switch (ib_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + if (IB_WR_SEND == ib_wr->opcode) { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + wqe_misc = NES_IWARP_SQ_OP_SENDSE; + else + wqe_misc = NES_IWARP_SQ_OP_SEND; + } else { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + wqe_misc = NES_IWARP_SQ_OP_SENDSEINV; + else + wqe_misc = NES_IWARP_SQ_OP_SENDINV; + + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX, + ib_wr->ex.invalidate_rkey); + } + + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + err = -EINVAL; + break; + } + + if (ib_wr->send_flags & IB_SEND_FENCE) + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + + if ((ib_wr->send_flags & IB_SEND_INLINE) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && + (ib_wr->sg_list[0].length <= 64)) { + memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], + (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + ib_wr->sg_list[0].length); + wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; + } else { + fill_wqe_sg_send(wqe, ib_wr, 1); + } + + break; + case IB_WR_RDMA_WRITE: + wqe_misc = NES_IWARP_SQ_OP_RDMAW; + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n", + ib_wr->num_sge, nesdev->nesadapter->max_sge); + err = -EINVAL; + break; + } + + if (ib_wr->send_flags & IB_SEND_FENCE) + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, + ib_wr->wr.rdma.rkey); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, + ib_wr->wr.rdma.remote_addr); + + if ((ib_wr->send_flags & IB_SEND_INLINE) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && + (ib_wr->sg_list[0].length <= 64)) { + memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], + (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + ib_wr->sg_list[0].length); + wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; + } else { + fill_wqe_sg_send(wqe, ib_wr, 1); + } + + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + /* iWARP only supports 1 sge for RDMA reads */ + if (ib_wr->num_sge > 1) { + nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n", + ib_wr->num_sge); + err = -EINVAL; + break; + } + if (ib_wr->opcode == IB_WR_RDMA_READ) { + wqe_misc = NES_IWARP_SQ_OP_RDMAR; + } else { + wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV; + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX, + ib_wr->ex.invalidate_rkey); + } + + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, + ib_wr->wr.rdma.remote_addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, + ib_wr->wr.rdma.rkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, + ib_wr->sg_list->length); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, + ib_wr->sg_list->addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX, + ib_wr->sg_list->lkey); + break; + case IB_WR_LOCAL_INV: + wqe_misc = NES_IWARP_SQ_OP_LOCINV; + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX, + ib_wr->ex.invalidate_rkey); + break; + case IB_WR_FAST_REG_MR: + { + int i; + int flags = ib_wr->wr.fast_reg.access_flags; + struct nes_ib_fast_reg_page_list *pnesfrpl = + container_of(ib_wr->wr.fast_reg.page_list, + struct nes_ib_fast_reg_page_list, + ibfrpl); + u64 *src_page_list = pnesfrpl->ibfrpl.page_list; + u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva; + + if (ib_wr->wr.fast_reg.page_list_len > + (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { + nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); + err = -EINVAL; + break; + } + wqe_misc = NES_IWARP_SQ_OP_FAST_REG; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, + ib_wr->wr.fast_reg.iova_start); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, + ib_wr->wr.fast_reg.length); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, + ib_wr->wr.fast_reg.rkey); + /* Set page size: */ + if (ib_wr->wr.fast_reg.page_shift == 12) { + wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; + } else if (ib_wr->wr.fast_reg.page_shift == 21) { + wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; + } else { + nes_debug(NES_DBG_IW_TX, "Invalid page shift," + " ib_wr=%u, max=1\n", ib_wr->num_sge); + err = -EINVAL; + break; + } + /* Set access_flags */ + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; + if (flags & IB_ACCESS_LOCAL_WRITE) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE; + + if (flags & IB_ACCESS_REMOTE_WRITE) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE; + + if (flags & IB_ACCESS_REMOTE_READ) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ; + + if (flags & IB_ACCESS_MW_BIND) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; + + /* Fill in PBL info: */ + if (ib_wr->wr.fast_reg.page_list_len > + pnesfrpl->ibfrpl.max_page_list_len) { + nes_debug(NES_DBG_IW_TX, "Invalid page list length," + " ib_wr=%p, value=%u, max=%u\n", + ib_wr, ib_wr->wr.fast_reg.page_list_len, + pnesfrpl->ibfrpl.max_page_list_len); + err = -EINVAL; + break; + } + + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, + pnesfrpl->nes_wqe_pbl.paddr); + + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, + ib_wr->wr.fast_reg.page_list_len * 8); + + for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++) + dst_page_list[i] = cpu_to_le64(src_page_list[i]); + + nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " + "length: %d, rkey: %0x, pgl_paddr: %llx, " + "page_list_len: %u, wqe_misc: %x\n", + (unsigned long long) ib_wr->wr.fast_reg.iova_start, + ib_wr->wr.fast_reg.length, + ib_wr->wr.fast_reg.rkey, + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, + ib_wr->wr.fast_reg.page_list_len, + wqe_misc); + break; + } + default: + /* error */ + err = -EINVAL; + break; + } + + if (err) + break; + + if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all) + wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; + + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc); + + ib_wr = ib_wr->next; + head++; + wqe_count++; + if (head >= qsize) + head = 0; + + } + + nesqp->hwqp.sq_head = head; + barrier(); + while (wqe_count) { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (counter << 24) | 0x00800000 | nesqp->hwqp.qp_id); + } + + spin_unlock_irqrestore(&nesqp->lock, flags); + +out: + if (err) + *bad_wr = ib_wr; + return err; +} + + +/** + * nes_post_recv + */ +static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr) +{ + u64 u64temp; + unsigned long flags = 0; + struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_qp *nesqp = to_nesqp(ibqp); + struct nes_hw_qp_wqe *wqe; + int err = 0; + int sge_index; + u32 qsize = nesqp->hwqp.rq_size; + u32 head; + u32 wqe_count = 0; + u32 counter; + u32 total_payload_length; + + if (nesqp->ibqp_state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } + + spin_lock_irqsave(&nesqp->lock, flags); + + head = nesqp->hwqp.rq_head; + + while (ib_wr) { + /* Check for QP error */ + if (nesqp->term_flags) { + err = -EINVAL; + break; + } + + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + err = -EINVAL; + break; + } + /* Check for RQ overflow */ + if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) { + err = -ENOMEM; + break; + } + + nes_debug(NES_DBG_IW_RX, "ibwr sge count = %u.\n", ib_wr->num_sge); + wqe = &nesqp->hwqp.rq_vbase[head]; + + /* nes_debug(NES_DBG_IW_RX, "QP%u:processing rq wqe at %p, head = %u.\n", + nesqp->hwqp.qp_id, wqe, head); */ + nes_fill_init_qp_wqe(wqe, nesqp, head); + u64temp = (u64)(ib_wr->wr_id); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, + u64temp); + total_payload_length = 0; + for (sge_index=0; sge_index < ib_wr->num_sge; sge_index++) { + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_FRAG0_LOW_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_LENGTH0_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].length); + set_wqe_32bit_value(wqe->wqe_words,NES_IWARP_RQ_WQE_STAG0_IDX+(sge_index*4), + ib_wr->sg_list[sge_index].lkey); + + total_payload_length += ib_wr->sg_list[sge_index].length; + } + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX, + total_payload_length); + + ib_wr = ib_wr->next; + head++; + wqe_count++; + if (head >= qsize) + head = 0; + } + + nesqp->hwqp.rq_head = head; + barrier(); + while (wqe_count) { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id); + } + + spin_unlock_irqrestore(&nesqp->lock, flags); + +out: + if (err) + *bad_wr = ib_wr; + return err; +} + + +/** + * nes_poll_cq + */ +static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + u64 u64temp; + u64 wrid; + unsigned long flags = 0; + struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_cq *nescq = to_nescq(ibcq); + struct nes_qp *nesqp; + struct nes_hw_cqe cqe; + u32 head; + u32 wq_tail = 0; + u32 cq_size; + u32 cqe_count = 0; + u32 wqe_index; + u32 u32temp; + u32 move_cq_head = 1; + u32 err_code; + + nes_debug(NES_DBG_CQ, "\n"); + + spin_lock_irqsave(&nescq->lock, flags); + + head = nescq->hw_cq.cq_head; + cq_size = nescq->hw_cq.cq_size; + + while (cqe_count < num_entries) { + if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & + NES_CQE_VALID) == 0) + break; + + /* + * Make sure we read CQ entry contents *after* + * we've checked the valid bit. + */ + rmb(); + + cqe = nescq->hw_cq.cq_vbase[head]; + u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); + wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1); + u32temp &= ~(NES_SW_CONTEXT_ALIGN-1); + /* parse CQE, get completion context from WQE (either rq or sq) */ + u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | + ((u64)u32temp); + + if (u64temp) { + nesqp = (struct nes_qp *)(unsigned long)u64temp; + memset(entry, 0, sizeof *entry); + if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) { + entry->status = IB_WC_SUCCESS; + } else { + err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]); + if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) { + entry->status = err_code & 0x0000ffff; + + /* The rest of the cqe's will be marked as flushed */ + nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] = + cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) | + NES_IWARP_CQE_MINOR_FLUSH); + } else + entry->status = IB_WC_WR_FLUSH_ERR; + } + + entry->qp = &nesqp->ibqp; + entry->src_qp = nesqp->hwqp.qp_id; + + if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) { + if (nesqp->skip_lsmm) { + nesqp->skip_lsmm = 0; + nesqp->hwqp.sq_tail++; + } + + /* Working on a SQ Completion*/ + wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index]. + wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | + ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index]. + wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX]))); + entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. + wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]); + + switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. + wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) { + case NES_IWARP_SQ_OP_RDMAW: + nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n"); + entry->opcode = IB_WC_RDMA_WRITE; + break; + case NES_IWARP_SQ_OP_RDMAR: + nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n"); + entry->opcode = IB_WC_RDMA_READ; + entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. + wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]); + break; + case NES_IWARP_SQ_OP_SENDINV: + case NES_IWARP_SQ_OP_SENDSEINV: + case NES_IWARP_SQ_OP_SEND: + case NES_IWARP_SQ_OP_SENDSE: + nes_debug(NES_DBG_CQ, "Operation = Send.\n"); + entry->opcode = IB_WC_SEND; + break; + case NES_IWARP_SQ_OP_LOCINV: + entry->opcode = IB_WC_LOCAL_INV; + break; + case NES_IWARP_SQ_OP_FAST_REG: + entry->opcode = IB_WC_FAST_REG_MR; + break; + } + + nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1); + if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) { + move_cq_head = 0; + wq_tail = nesqp->hwqp.sq_tail; + } + } else { + /* Working on a RQ Completion*/ + entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]); + wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) | + ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32); + entry->opcode = IB_WC_RECV; + + nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1); + if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) { + move_cq_head = 0; + wq_tail = nesqp->hwqp.rq_tail; + } + } + + entry->wr_id = wrid; + entry++; + cqe_count++; + } + + if (move_cq_head) { + nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; + if (++head >= cq_size) + head = 0; + nescq->polled_completions++; + + if ((nescq->polled_completions > (cq_size / 2)) || + (nescq->polled_completions == 255)) { + nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes" + " are pending %u of %u.\n", + nescq->hw_cq.cq_number, nescq->polled_completions, cq_size); + nes_write32(nesdev->regs+NES_CQE_ALLOC, + nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); + nescq->polled_completions = 0; + } + } else { + /* Update the wqe index and set status to flush */ + wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); + wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail; + nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = + cpu_to_le32(wqe_index); + move_cq_head = 1; /* ready for next pass */ + } + } + + if (nescq->polled_completions) { + nes_write32(nesdev->regs+NES_CQE_ALLOC, + nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); + nescq->polled_completions = 0; + } + + nescq->hw_cq.cq_head = head; + nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n", + cqe_count, nescq->hw_cq.cq_number); + + spin_unlock_irqrestore(&nescq->lock, flags); + + return cqe_count; +} + + +/** + * nes_req_notify_cq + */ +static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) + { + struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_cq *nescq = to_nescq(ibcq); + u32 cq_arm; + + nes_debug(NES_DBG_CQ, "Requesting notification for CQ%u.\n", + nescq->hw_cq.cq_number); + + cq_arm = nescq->hw_cq.cq_number; + if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) + cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT; + else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) + cq_arm |= NES_CQE_ALLOC_NOTIFY_SE; + else + return -EINVAL; + + nes_write32(nesdev->regs+NES_CQE_ALLOC, cq_arm); + nes_read32(nesdev->regs+NES_CQE_ALLOC); + + return 0; +} + + +/** + * nes_init_ofa_device + */ +struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) +{ + struct nes_ib_device *nesibdev; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + + nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device)); + if (nesibdev == NULL) { + return NULL; + } + strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX); + nesibdev->ibdev.owner = THIS_MODULE; + + nesibdev->ibdev.node_type = RDMA_NODE_RNIC; + memset(&nesibdev->ibdev.node_guid, 0, sizeof(nesibdev->ibdev.node_guid)); + memcpy(&nesibdev->ibdev.node_guid, netdev->dev_addr, 6); + + nesibdev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_BIND_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_POST_SEND); + + nesibdev->ibdev.phys_port_cnt = 1; + nesibdev->ibdev.num_comp_vectors = 1; + nesibdev->ibdev.dma_device = &nesdev->pcidev->dev; + nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; + nesibdev->ibdev.query_device = nes_query_device; + nesibdev->ibdev.query_port = nes_query_port; + nesibdev->ibdev.query_pkey = nes_query_pkey; + nesibdev->ibdev.query_gid = nes_query_gid; + nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext; + nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext; + nesibdev->ibdev.mmap = nes_mmap; + nesibdev->ibdev.alloc_pd = nes_alloc_pd; + nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; + nesibdev->ibdev.create_ah = nes_create_ah; + nesibdev->ibdev.destroy_ah = nes_destroy_ah; + nesibdev->ibdev.create_qp = nes_create_qp; + nesibdev->ibdev.modify_qp = nes_modify_qp; + nesibdev->ibdev.query_qp = nes_query_qp; + nesibdev->ibdev.destroy_qp = nes_destroy_qp; + nesibdev->ibdev.create_cq = nes_create_cq; + nesibdev->ibdev.destroy_cq = nes_destroy_cq; + nesibdev->ibdev.poll_cq = nes_poll_cq; + nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; + nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr; + nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; + nesibdev->ibdev.dereg_mr = nes_dereg_mr; + nesibdev->ibdev.alloc_mw = nes_alloc_mw; + nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; + nesibdev->ibdev.bind_mw = nes_bind_mw; + + nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr; + nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list; + nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list; + + nesibdev->ibdev.attach_mcast = nes_multicast_attach; + nesibdev->ibdev.detach_mcast = nes_multicast_detach; + nesibdev->ibdev.process_mad = nes_process_mad; + + nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; + nesibdev->ibdev.post_send = nes_post_send; + nesibdev->ibdev.post_recv = nes_post_recv; + + nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); + if (nesibdev->ibdev.iwcm == NULL) { + ib_dealloc_device(&nesibdev->ibdev); + return NULL; + } + nesibdev->ibdev.iwcm->add_ref = nes_add_ref; + nesibdev->ibdev.iwcm->rem_ref = nes_rem_ref; + nesibdev->ibdev.iwcm->get_qp = nes_get_qp; + nesibdev->ibdev.iwcm->connect = nes_connect; + nesibdev->ibdev.iwcm->accept = nes_accept; + nesibdev->ibdev.iwcm->reject = nes_reject; + nesibdev->ibdev.iwcm->create_listen = nes_create_listen; + nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; + + return nesibdev; +} + + +/** + * nes_handle_delayed_event + */ +static void nes_handle_delayed_event(unsigned long data) +{ + struct nes_vnic *nesvnic = (void *) data; + + if (nesvnic->delayed_event != nesvnic->last_dispatched_event) { + struct ib_event event; + + event.device = &nesvnic->nesibdev->ibdev; + if (!event.device) + goto stop_timer; + event.event = nesvnic->delayed_event; + event.element.port_num = nesvnic->logical_port + 1; + ib_dispatch_event(&event); + } + +stop_timer: + nesvnic->event_timer.function = NULL; +} + + +void nes_port_ibevent(struct nes_vnic *nesvnic) +{ + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + struct nes_device *nesdev = nesvnic->nesdev; + struct ib_event event; + event.device = &nesibdev->ibdev; + event.element.port_num = nesvnic->logical_port + 1; + event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + + if (!nesvnic->event_timer.function) { + ib_dispatch_event(&event); + nesvnic->last_dispatched_event = event.event; + nesvnic->event_timer.function = nes_handle_delayed_event; + nesvnic->event_timer.data = (unsigned long) nesvnic; + nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY; + add_timer(&nesvnic->event_timer); + } else { + mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY); + } + nesvnic->delayed_event = event.event; +} + + +/** + * nes_destroy_ofa_device + */ +void nes_destroy_ofa_device(struct nes_ib_device *nesibdev) +{ + if (nesibdev == NULL) + return; + + nes_unregister_ofa_device(nesibdev); + + kfree(nesibdev->ibdev.iwcm); + ib_dealloc_device(&nesibdev->ibdev); +} + + +/** + * nes_register_ofa_device + */ +int nes_register_ofa_device(struct nes_ib_device *nesibdev) +{ + struct nes_vnic *nesvnic = nesibdev->nesvnic; + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + int i, ret; + + ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL); + if (ret) { + return ret; + } + + /* Get the resources allocated to this device */ + nesibdev->max_cq = (nesadapter->max_cq-NES_FIRST_QPN) / nesadapter->port_count; + nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count; + nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count; + nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count; + + for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) { + ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]); + if (ret) { + while (i > 0) { + i--; + device_remove_file(&nesibdev->ibdev.dev, + nes_dev_attributes[i]); + } + ib_unregister_device(&nesibdev->ibdev); + return ret; + } + } + + nesvnic->of_device_registered = 1; + + return 0; +} + + +/** + * nes_unregister_ofa_device + */ +static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) +{ + struct nes_vnic *nesvnic = nesibdev->nesvnic; + int i; + + for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) { + device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]); + } + + if (nesvnic->of_device_registered) { + ib_unregister_device(&nesibdev->ibdev); + } + + nesvnic->of_device_registered = 0; +} diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h new file mode 100644 index 00000000..0eff7c44 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef NES_VERBS_H +#define NES_VERBS_H + +struct nes_device; + +#define NES_MAX_USER_DB_REGIONS 4096 +#define NES_MAX_USER_WQ_REGIONS 4096 + +#define NES_TERM_SENT 0x01 +#define NES_TERM_RCVD 0x02 +#define NES_TERM_DONE 0x04 + +struct nes_ucontext { + struct ib_ucontext ibucontext; + struct nes_device *nesdev; + unsigned long mmap_wq_offset; + unsigned long mmap_cq_offset; /* to be removed */ + int index; /* rnic index (minor) */ + unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)]; + u16 mmap_db_index[NES_MAX_USER_DB_REGIONS]; + u16 first_free_db; + unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)]; + struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS]; + u16 first_free_wq; + struct list_head cq_reg_mem_list; + struct list_head qp_reg_mem_list; + u32 mcrqf; + atomic_t usecnt; +}; + +struct nes_pd { + struct ib_pd ibpd; + u16 pd_id; + atomic_t sqp_count; + u16 mmap_db_index; +}; + +struct nes_mr { + union { + struct ib_mr ibmr; + struct ib_mw ibmw; + struct ib_fmr ibfmr; + }; + struct ib_umem *region; + u16 pbls_used; + u8 mode; + u8 pbl_4k; +}; + +struct nes_hw_pb { + __le32 pa_low; + __le32 pa_high; +}; + +struct nes_vpbl { + dma_addr_t pbl_pbase; + struct nes_hw_pb *pbl_vbase; +}; + +struct nes_root_vpbl { + dma_addr_t pbl_pbase; + struct nes_hw_pb *pbl_vbase; + struct nes_vpbl *leaf_vpbl; +}; + +struct nes_fmr { + struct nes_mr nesmr; + u32 leaf_pbl_cnt; + struct nes_root_vpbl root_vpbl; + struct ib_qp *ib_qp; + int access_rights; + struct ib_fmr_attr attr; +}; + +struct nes_av; + +struct nes_cq { + struct ib_cq ibcq; + struct nes_hw_cq hw_cq; + u32 polled_completions; + u32 cq_mem_size; + spinlock_t lock; + u8 virtual_cq; + u8 pad[3]; + u32 mcrqf; +}; + +struct nes_wq { + spinlock_t lock; +}; + +struct disconn_work { + struct work_struct work; + struct nes_qp *nesqp; +}; + +struct iw_cm_id; +struct ietf_mpa_frame; + +struct nes_qp { + struct ib_qp ibqp; + void *allocated_buffer; + struct iw_cm_id *cm_id; + struct nes_cq *nesscq; + struct nes_cq *nesrcq; + struct nes_pd *nespd; + void *cm_node; /* handle of the node this QP is associated with */ + void *ietf_frame; + u8 ietf_frame_size; + dma_addr_t ietf_frame_pbase; + struct ib_mr *lsmm_mr; + struct nes_hw_qp hwqp; + struct work_struct work; + enum ib_qp_state ibqp_state; + u32 iwarp_state; + u32 hte_index; + u32 last_aeq; + u32 qp_mem_size; + atomic_t refcount; + atomic_t close_timer_started; + u32 mmap_sq_db_index; + u32 mmap_rq_db_index; + spinlock_t lock; + spinlock_t pau_lock; + struct nes_qp_context *nesqp_context; + dma_addr_t nesqp_context_pbase; + void *pbl_vbase; + dma_addr_t pbl_pbase; + struct page *page; + struct timer_list terminate_timer; + enum ib_event_type terminate_eventtype; + struct sk_buff_head pau_list; + u32 pau_rcv_nxt; + u16 active_conn:1; + u16 skip_lsmm:1; + u16 user_mode:1; + u16 hte_added:1; + u16 flush_issued:1; + u16 destroyed:1; + u16 sig_all:1; + u16 pau_mode:1; + u16 rsvd:8; + u16 private_data_len; + u16 term_sq_flush_code; + u16 term_rq_flush_code; + u8 hw_iwarp_state; + u8 hw_tcp_state; + u8 term_flags; + u8 sq_kmapped; + u8 pau_busy; + u8 pau_pending; + u8 pau_state; +}; +#endif /* NES_VERBS_H */ diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig new file mode 100644 index 00000000..8349f9c5 --- /dev/null +++ b/drivers/infiniband/hw/qib/Kconfig @@ -0,0 +1,7 @@ +config INFINIBAND_QIB + tristate "QLogic PCIe HCA support" + depends on 64BIT + ---help--- + This is a low-level driver for QLogic PCIe QLE InfiniBand host + channel adapters. This driver does not support the QLogic + HyperTransport card (model QHT7140). diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile new file mode 100644 index 00000000..f12d7bb8 --- /dev/null +++ b/drivers/infiniband/hw/qib/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o + +ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \ + qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \ + qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \ + qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ + qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \ + qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \ + qib_sd7220.o qib_iba7322.o qib_verbs.o + +# 6120 has no fallback if no MSI interrupts, others can do INTx +ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o + +ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o +ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h new file mode 100644 index 00000000..6b811e3e --- /dev/null +++ b/drivers/infiniband/hw/qib/qib.h @@ -0,0 +1,1466 @@ +#ifndef _QIB_KERNEL_H +#define _QIB_KERNEL_H +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This header file is the base header file for qlogic_ib kernel code + * qib_user.h serves a similar purpose for user code. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qib_common.h" +#include "qib_verbs.h" + +/* only s/w major version of QLogic_IB we can handle */ +#define QIB_CHIP_VERS_MAJ 2U + +/* don't care about this except printing */ +#define QIB_CHIP_VERS_MIN 0U + +/* The Organization Unique Identifier (Mfg code), and its position in GUID */ +#define QIB_OUI 0x001175 +#define QIB_OUI_LSB 40 + +/* + * per driver stats, either not device nor port-specific, or + * summed over all of the devices and ports. + * They are described by name via ipathfs filesystem, so layout + * and number of elements can change without breaking compatibility. + * If members are added or deleted qib_statnames[] in qib_fs.c must + * change to match. + */ +struct qlogic_ib_stats { + __u64 sps_ints; /* number of interrupts handled */ + __u64 sps_errints; /* number of error interrupts */ + __u64 sps_txerrs; /* tx-related packet errors */ + __u64 sps_rcverrs; /* non-crc rcv packet errors */ + __u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */ + __u64 sps_nopiobufs; /* no pio bufs avail from kernel */ + __u64 sps_ctxts; /* number of contexts currently open */ + __u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */ + __u64 sps_buffull; + __u64 sps_hdrfull; +}; + +extern struct qlogic_ib_stats qib_stats; +extern struct pci_error_handlers qib_pci_err_handler; +extern struct pci_driver qib_driver; + +#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ +/* + * First-cut critierion for "device is active" is + * two thousand dwords combined Tx, Rx traffic per + * 5-second interval. SMA packets are 64 dwords, + * and occur "a few per second", presumably each way. + */ +#define QIB_TRAFFIC_ACTIVE_THRESHOLD (2000) + +/* + * Struct used to indicate which errors are logged in each of the + * error-counters that are logged to EEPROM. A counter is incremented + * _once_ (saturating at 255) for each event with any bits set in + * the error or hwerror register masks below. + */ +#define QIB_EEP_LOG_CNT (4) +struct qib_eep_log_mask { + u64 errs_to_log; + u64 hwerrs_to_log; +}; + +/* + * Below contains all data related to a single context (formerly called port). + */ +struct qib_ctxtdata { + void **rcvegrbuf; + dma_addr_t *rcvegrbuf_phys; + /* rcvhdrq base, needs mmap before useful */ + void *rcvhdrq; + /* kernel virtual address where hdrqtail is updated */ + void *rcvhdrtail_kvaddr; + /* + * temp buffer for expected send setup, allocated at open, instead + * of each setup call + */ + void *tid_pg_list; + /* + * Shared page for kernel to signal user processes that send buffers + * need disarming. The process should call QIB_CMD_DISARM_BUFS + * or QIB_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set. + */ + unsigned long *user_event_mask; + /* when waiting for rcv or pioavail */ + wait_queue_head_t wait; + /* + * rcvegr bufs base, physical, must fit + * in 44 bits so 32 bit programs mmap64 44 bit works) + */ + dma_addr_t rcvegr_phys; + /* mmap of hdrq, must fit in 44 bits */ + dma_addr_t rcvhdrq_phys; + dma_addr_t rcvhdrqtailaddr_phys; + + /* + * number of opens (including slave sub-contexts) on this instance + * (ignoring forks, dup, etc. for now) + */ + int cnt; + /* + * how much space to leave at start of eager TID entries for + * protocol use, on each TID + */ + /* instead of calculating it */ + unsigned ctxt; + /* non-zero if ctxt is being shared. */ + u16 subctxt_cnt; + /* non-zero if ctxt is being shared. */ + u16 subctxt_id; + /* number of eager TID entries. */ + u16 rcvegrcnt; + /* index of first eager TID entry. */ + u16 rcvegr_tid_base; + /* number of pio bufs for this ctxt (all procs, if shared) */ + u32 piocnt; + /* first pio buffer for this ctxt */ + u32 pio_base; + /* chip offset of PIO buffers for this ctxt */ + u32 piobufs; + /* how many alloc_pages() chunks in rcvegrbuf_pages */ + u32 rcvegrbuf_chunks; + /* how many egrbufs per chunk */ + u16 rcvegrbufs_perchunk; + /* ilog2 of above */ + u16 rcvegrbufs_perchunk_shift; + /* order for rcvegrbuf_pages */ + size_t rcvegrbuf_size; + /* rcvhdrq size (for freeing) */ + size_t rcvhdrq_size; + /* per-context flags for fileops/intr communication */ + unsigned long flag; + /* next expected TID to check when looking for free */ + u32 tidcursor; + /* WAIT_RCV that timed out, no interrupt */ + u32 rcvwait_to; + /* WAIT_PIO that timed out, no interrupt */ + u32 piowait_to; + /* WAIT_RCV already happened, no wait */ + u32 rcvnowait; + /* WAIT_PIO already happened, no wait */ + u32 pionowait; + /* total number of polled urgent packets */ + u32 urgent; + /* saved total number of polled urgent packets for poll edge trigger */ + u32 urgent_poll; + /* pid of process using this ctxt */ + pid_t pid; + pid_t subpid[QLOGIC_IB_MAX_SUBCTXT]; + /* same size as task_struct .comm[], command that opened context */ + char comm[16]; + /* pkeys set by this use of this ctxt */ + u16 pkeys[4]; + /* so file ops can get at unit */ + struct qib_devdata *dd; + /* so funcs that need physical port can get it easily */ + struct qib_pportdata *ppd; + /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ + void *subctxt_uregbase; + /* An array of pages for the eager receive buffers * N */ + void *subctxt_rcvegrbuf; + /* An array of pages for the eager header queue entries * N */ + void *subctxt_rcvhdr_base; + /* The version of the library which opened this ctxt */ + u32 userversion; + /* Bitmask of active slaves */ + u32 active_slaves; + /* Type of packets or conditions we want to poll for */ + u16 poll_type; + /* receive packet sequence counter */ + u8 seq_cnt; + u8 redirect_seq_cnt; + /* ctxt rcvhdrq head offset */ + u32 head; + u32 pkt_count; + /* lookaside fields */ + struct qib_qp *lookaside_qp; + u32 lookaside_qpn; + /* QPs waiting for context processing */ + struct list_head qp_wait_list; +}; + +struct qib_sge_state; + +struct qib_sdma_txreq { + int flags; + int sg_count; + dma_addr_t addr; + void (*callback)(struct qib_sdma_txreq *, int); + u16 start_idx; /* sdma private */ + u16 next_descq_idx; /* sdma private */ + struct list_head list; /* sdma private */ +}; + +struct qib_sdma_desc { + __le64 qw[2]; +}; + +struct qib_verbs_txreq { + struct qib_sdma_txreq txreq; + struct qib_qp *qp; + struct qib_swqe *wqe; + u32 dwords; + u16 hdr_dwords; + u16 hdr_inx; + struct qib_pio_header *align_buf; + struct qib_mregion *mr; + struct qib_sge_state *ss; +}; + +#define QIB_SDMA_TXREQ_F_USELARGEBUF 0x1 +#define QIB_SDMA_TXREQ_F_HEADTOHOST 0x2 +#define QIB_SDMA_TXREQ_F_INTREQ 0x4 +#define QIB_SDMA_TXREQ_F_FREEBUF 0x8 +#define QIB_SDMA_TXREQ_F_FREEDESC 0x10 + +#define QIB_SDMA_TXREQ_S_OK 0 +#define QIB_SDMA_TXREQ_S_SENDERROR 1 +#define QIB_SDMA_TXREQ_S_ABORTED 2 +#define QIB_SDMA_TXREQ_S_SHUTDOWN 3 + +/* + * Get/Set IB link-level config parameters for f_get/set_ib_cfg() + * Mostly for MADs that set or query link parameters, also ipath + * config interfaces + */ +#define QIB_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */ +#define QIB_IB_CFG_LWID_ENB 2 /* allowed Link-width */ +#define QIB_IB_CFG_LWID 3 /* currently active Link-width */ +#define QIB_IB_CFG_SPD_ENB 4 /* allowed Link speeds */ +#define QIB_IB_CFG_SPD 5 /* current Link spd */ +#define QIB_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */ +#define QIB_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */ +#define QIB_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */ +#define QIB_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */ +#define QIB_IB_CFG_OP_VLS 10 /* operational VLs */ +#define QIB_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */ +#define QIB_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */ +#define QIB_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */ +#define QIB_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */ +#define QIB_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */ +#define QIB_IB_CFG_PKEYS 16 /* update partition keys */ +#define QIB_IB_CFG_MTU 17 /* update MTU in IBC */ +#define QIB_IB_CFG_LSTATE 18 /* update linkcmd and linkinitcmd in IBC */ +#define QIB_IB_CFG_VL_HIGH_LIMIT 19 +#define QIB_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */ +#define QIB_IB_CFG_PORT 21 /* switch port we are connected to */ + +/* + * for CFG_LSTATE: LINKCMD in upper 16 bits, LINKINITCMD in lower 16 + * IB_LINKINITCMD_POLL and SLEEP are also used as set/get values for + * QIB_IB_CFG_LINKDEFAULT cmd + */ +#define IB_LINKCMD_DOWN (0 << 16) +#define IB_LINKCMD_ARMED (1 << 16) +#define IB_LINKCMD_ACTIVE (2 << 16) +#define IB_LINKINITCMD_NOP 0 +#define IB_LINKINITCMD_POLL 1 +#define IB_LINKINITCMD_SLEEP 2 +#define IB_LINKINITCMD_DISABLE 3 + +/* + * valid states passed to qib_set_linkstate() user call + */ +#define QIB_IB_LINKDOWN 0 +#define QIB_IB_LINKARM 1 +#define QIB_IB_LINKACTIVE 2 +#define QIB_IB_LINKDOWN_ONLY 3 +#define QIB_IB_LINKDOWN_SLEEP 4 +#define QIB_IB_LINKDOWN_DISABLE 5 + +/* + * These 7 values (SDR, DDR, and QDR may be ORed for auto-speed + * negotiation) are used for the 3rd argument to path_f_set_ib_cfg + * with cmd QIB_IB_CFG_SPD_ENB, by direct calls or via sysfs. They + * are also the the possible values for qib_link_speed_enabled and active + * The values were chosen to match values used within the IB spec. + */ +#define QIB_IB_SDR 1 +#define QIB_IB_DDR 2 +#define QIB_IB_QDR 4 + +#define QIB_DEFAULT_MTU 4096 + +/* max number of IB ports supported per HCA */ +#define QIB_MAX_IB_PORTS 2 + +/* + * Possible IB config parameters for f_get/set_ib_table() + */ +#define QIB_IB_TBL_VL_HIGH_ARB 1 /* Get/set VL high priority weights */ +#define QIB_IB_TBL_VL_LOW_ARB 2 /* Get/set VL low priority weights */ + +/* + * Possible "operations" for f_rcvctrl(ppd, op, ctxt) + * these are bits so they can be combined, e.g. + * QIB_RCVCTRL_INTRAVAIL_ENB | QIB_RCVCTRL_CTXT_ENB + */ +#define QIB_RCVCTRL_TAILUPD_ENB 0x01 +#define QIB_RCVCTRL_TAILUPD_DIS 0x02 +#define QIB_RCVCTRL_CTXT_ENB 0x04 +#define QIB_RCVCTRL_CTXT_DIS 0x08 +#define QIB_RCVCTRL_INTRAVAIL_ENB 0x10 +#define QIB_RCVCTRL_INTRAVAIL_DIS 0x20 +#define QIB_RCVCTRL_PKEY_ENB 0x40 /* Note, default is enabled */ +#define QIB_RCVCTRL_PKEY_DIS 0x80 +#define QIB_RCVCTRL_BP_ENB 0x0100 +#define QIB_RCVCTRL_BP_DIS 0x0200 +#define QIB_RCVCTRL_TIDFLOW_ENB 0x0400 +#define QIB_RCVCTRL_TIDFLOW_DIS 0x0800 + +/* + * Possible "operations" for f_sendctrl(ppd, op, var) + * these are bits so they can be combined, e.g. + * QIB_SENDCTRL_BUFAVAIL_ENB | QIB_SENDCTRL_ENB + * Some operations (e.g. DISARM, ABORT) are known to + * be "one-shot", so do not modify shadow. + */ +#define QIB_SENDCTRL_DISARM (0x1000) +#define QIB_SENDCTRL_DISARM_BUF(bufn) ((bufn) | QIB_SENDCTRL_DISARM) + /* available (0x2000) */ +#define QIB_SENDCTRL_AVAIL_DIS (0x4000) +#define QIB_SENDCTRL_AVAIL_ENB (0x8000) +#define QIB_SENDCTRL_AVAIL_BLIP (0x10000) +#define QIB_SENDCTRL_SEND_DIS (0x20000) +#define QIB_SENDCTRL_SEND_ENB (0x40000) +#define QIB_SENDCTRL_FLUSH (0x80000) +#define QIB_SENDCTRL_CLEAR (0x100000) +#define QIB_SENDCTRL_DISARM_ALL (0x200000) + +/* + * These are the generic indices for requesting per-port + * counter values via the f_portcntr function. They + * are always returned as 64 bit values, although most + * are 32 bit counters. + */ +/* send-related counters */ +#define QIBPORTCNTR_PKTSEND 0U +#define QIBPORTCNTR_WORDSEND 1U +#define QIBPORTCNTR_PSXMITDATA 2U +#define QIBPORTCNTR_PSXMITPKTS 3U +#define QIBPORTCNTR_PSXMITWAIT 4U +#define QIBPORTCNTR_SENDSTALL 5U +/* receive-related counters */ +#define QIBPORTCNTR_PKTRCV 6U +#define QIBPORTCNTR_PSRCVDATA 7U +#define QIBPORTCNTR_PSRCVPKTS 8U +#define QIBPORTCNTR_RCVEBP 9U +#define QIBPORTCNTR_RCVOVFL 10U +#define QIBPORTCNTR_WORDRCV 11U +/* IB link related error counters */ +#define QIBPORTCNTR_RXLOCALPHYERR 12U +#define QIBPORTCNTR_RXVLERR 13U +#define QIBPORTCNTR_ERRICRC 14U +#define QIBPORTCNTR_ERRVCRC 15U +#define QIBPORTCNTR_ERRLPCRC 16U +#define QIBPORTCNTR_BADFORMAT 17U +#define QIBPORTCNTR_ERR_RLEN 18U +#define QIBPORTCNTR_IBSYMBOLERR 19U +#define QIBPORTCNTR_INVALIDRLEN 20U +#define QIBPORTCNTR_UNSUPVL 21U +#define QIBPORTCNTR_EXCESSBUFOVFL 22U +#define QIBPORTCNTR_ERRLINK 23U +#define QIBPORTCNTR_IBLINKDOWN 24U +#define QIBPORTCNTR_IBLINKERRRECOV 25U +#define QIBPORTCNTR_LLI 26U +/* other error counters */ +#define QIBPORTCNTR_RXDROPPKT 27U +#define QIBPORTCNTR_VL15PKTDROP 28U +#define QIBPORTCNTR_ERRPKEY 29U +#define QIBPORTCNTR_KHDROVFL 30U +/* sampling counters (these are actually control registers) */ +#define QIBPORTCNTR_PSINTERVAL 31U +#define QIBPORTCNTR_PSSTART 32U +#define QIBPORTCNTR_PSSTAT 33U + +/* how often we check for packet activity for "power on hours (in seconds) */ +#define ACTIVITY_TIMER 5 + +#define MAX_NAME_SIZE 64 +struct qib_msix_entry { + struct msix_entry msix; + void *arg; + char name[MAX_NAME_SIZE]; + cpumask_var_t mask; +}; + +/* Below is an opaque struct. Each chip (device) can maintain + * private data needed for its operation, but not germane to the + * rest of the driver. For convenience, we define another that + * is chip-specific, per-port + */ +struct qib_chip_specific; +struct qib_chipport_specific; + +enum qib_sdma_states { + qib_sdma_state_s00_hw_down, + qib_sdma_state_s10_hw_start_up_wait, + qib_sdma_state_s20_idle, + qib_sdma_state_s30_sw_clean_up_wait, + qib_sdma_state_s40_hw_clean_up_wait, + qib_sdma_state_s50_hw_halt_wait, + qib_sdma_state_s99_running, +}; + +enum qib_sdma_events { + qib_sdma_event_e00_go_hw_down, + qib_sdma_event_e10_go_hw_start, + qib_sdma_event_e20_hw_started, + qib_sdma_event_e30_go_running, + qib_sdma_event_e40_sw_cleaned, + qib_sdma_event_e50_hw_cleaned, + qib_sdma_event_e60_hw_halted, + qib_sdma_event_e70_go_idle, + qib_sdma_event_e7220_err_halted, + qib_sdma_event_e7322_err_halted, + qib_sdma_event_e90_timer_tick, +}; + +extern char *qib_sdma_state_names[]; +extern char *qib_sdma_event_names[]; + +struct sdma_set_state_action { + unsigned op_enable:1; + unsigned op_intenable:1; + unsigned op_halt:1; + unsigned op_drain:1; + unsigned go_s99_running_tofalse:1; + unsigned go_s99_running_totrue:1; +}; + +struct qib_sdma_state { + struct kref kref; + struct completion comp; + enum qib_sdma_states current_state; + struct sdma_set_state_action *set_state_action; + unsigned current_op; + unsigned go_s99_running; + unsigned first_sendbuf; + unsigned last_sendbuf; /* really last +1 */ + /* debugging/devel */ + enum qib_sdma_states previous_state; + unsigned previous_op; + enum qib_sdma_events last_event; +}; + +struct xmit_wait { + struct timer_list timer; + u64 counter; + u8 flags; + struct cache { + u64 psxmitdata; + u64 psrcvdata; + u64 psxmitpkts; + u64 psrcvpkts; + u64 psxmitwait; + } counter_cache; +}; + +/* + * The structure below encapsulates data relevant to a physical IB Port. + * Current chips support only one such port, but the separation + * clarifies things a bit. Note that to conform to IB conventions, + * port-numbers are one-based. The first or only port is port1. + */ +struct qib_pportdata { + struct qib_ibport ibport_data; + + struct qib_devdata *dd; + struct qib_chippport_specific *cpspec; /* chip-specific per-port */ + struct kobject pport_kobj; + struct kobject sl2vl_kobj; + struct kobject diagc_kobj; + + /* GUID for this interface, in network order */ + __be64 guid; + + /* QIB_POLL, etc. link-state specific flags, per port */ + u32 lflags; + /* qib_lflags driver is waiting for */ + u32 state_wanted; + spinlock_t lflags_lock; + /* number of (port-specific) interrupts for this port -- saturates... */ + u32 int_counter; + + /* ref count for each pkey */ + atomic_t pkeyrefs[4]; + + /* + * this address is mapped readonly into user processes so they can + * get status cheaply, whenever they want. One qword of status per port + */ + u64 *statusp; + + /* SendDMA related entries */ + spinlock_t sdma_lock; + struct qib_sdma_state sdma_state; + unsigned long sdma_buf_jiffies; + struct qib_sdma_desc *sdma_descq; + u64 sdma_descq_added; + u64 sdma_descq_removed; + u16 sdma_descq_cnt; + u16 sdma_descq_tail; + u16 sdma_descq_head; + u16 sdma_next_intr; + u16 sdma_reset_wait; + u8 sdma_generation; + struct tasklet_struct sdma_sw_clean_up_task; + struct list_head sdma_activelist; + + dma_addr_t sdma_descq_phys; + volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ + dma_addr_t sdma_head_phys; + + wait_queue_head_t state_wait; /* for state_wanted */ + + /* HoL blocking for SMP replies */ + unsigned hol_state; + struct timer_list hol_timer; + + /* + * Shadow copies of registers; size indicates read access size. + * Most of them are readonly, but some are write-only register, + * where we manipulate the bits in the shadow copy, and then write + * the shadow copy to qlogic_ib. + * + * We deliberately make most of these 32 bits, since they have + * restricted range. For any that we read, we won't to generate 32 + * bit accesses, since Opteron will generate 2 separate 32 bit HT + * transactions for a 64 bit read, and we want to avoid unnecessary + * bus transactions. + */ + + /* This is the 64 bit group */ + /* last ibcstatus. opaque outside chip-specific code */ + u64 lastibcstat; + + /* these are the "32 bit" regs */ + + /* + * the following two are 32-bit bitmasks, but {test,clear,set}_bit + * all expect bit fields to be "unsigned long" + */ + unsigned long p_rcvctrl; /* shadow per-port rcvctrl */ + unsigned long p_sendctrl; /* shadow per-port sendctrl */ + + u32 ibmtu; /* The MTU programmed for this unit */ + /* + * Current max size IB packet (in bytes) including IB headers, that + * we can send. Changes when ibmtu changes. + */ + u32 ibmaxlen; + /* + * ibmaxlen at init time, limited by chip and by receive buffer + * size. Not changed after init. + */ + u32 init_ibmaxlen; + /* LID programmed for this instance */ + u16 lid; + /* list of pkeys programmed; 0 if not set */ + u16 pkeys[4]; + /* LID mask control */ + u8 lmc; + u8 link_width_supported; + u8 link_speed_supported; + u8 link_width_enabled; + u8 link_speed_enabled; + u8 link_width_active; + u8 link_speed_active; + u8 vls_supported; + u8 vls_operational; + /* Rx Polarity inversion (compensate for ~tx on partner) */ + u8 rx_pol_inv; + + u8 hw_pidx; /* physical port index */ + u8 port; /* IB port number and index into dd->pports - 1 */ + + u8 delay_mult; + + /* used to override LED behavior */ + u8 led_override; /* Substituted for normal value, if non-zero */ + u16 led_override_timeoff; /* delta to next timer event */ + u8 led_override_vals[2]; /* Alternates per blink-frame */ + u8 led_override_phase; /* Just counts, LSB picks from vals[] */ + atomic_t led_override_timer_active; + /* Used to flash LEDs in override mode */ + struct timer_list led_override_timer; + struct xmit_wait cong_stats; + struct timer_list symerr_clear_timer; +}; + +/* Observers. Not to be taken lightly, possibly not to ship. */ +/* + * If a diag read or write is to (bottom <= offset <= top), + * the "hoook" is called, allowing, e.g. shadows to be + * updated in sync with the driver. struct diag_observer + * is the "visible" part. + */ +struct diag_observer; + +typedef int (*diag_hook) (struct qib_devdata *dd, + const struct diag_observer *op, + u32 offs, u64 *data, u64 mask, int only_32); + +struct diag_observer { + diag_hook hook; + u32 bottom; + u32 top; +}; + +extern int qib_register_observer(struct qib_devdata *dd, + const struct diag_observer *op); + +/* Only declared here, not defined. Private to diags */ +struct diag_observer_list_elt; + +/* device data struct now contains only "general per-device" info. + * fields related to a physical IB port are in a qib_pportdata struct, + * described above) while fields only used by a particular chip-type are in + * a qib_chipdata struct, whose contents are opaque to this file. + */ +struct qib_devdata { + struct qib_ibdev verbs_dev; /* must be first */ + struct list_head list; + /* pointers to related structs for this device */ + /* pci access data structure */ + struct pci_dev *pcidev; + struct cdev *user_cdev; + struct cdev *diag_cdev; + struct device *user_device; + struct device *diag_device; + + /* mem-mapped pointer to base of chip regs */ + u64 __iomem *kregbase; + /* end of mem-mapped chip space excluding sendbuf and user regs */ + u64 __iomem *kregend; + /* physical address of chip for io_remap, etc. */ + resource_size_t physaddr; + /* qib_cfgctxts pointers */ + struct qib_ctxtdata **rcd; /* Receive Context Data */ + + /* qib_pportdata, points to array of (physical) port-specific + * data structs, indexed by pidx (0..n-1) + */ + struct qib_pportdata *pport; + struct qib_chip_specific *cspec; /* chip-specific */ + + /* kvirt address of 1st 2k pio buffer */ + void __iomem *pio2kbase; + /* kvirt address of 1st 4k pio buffer */ + void __iomem *pio4kbase; + /* mem-mapped pointer to base of PIO buffers (if using WC PAT) */ + void __iomem *piobase; + /* mem-mapped pointer to base of user chip regs (if using WC PAT) */ + u64 __iomem *userbase; + void __iomem *piovl15base; /* base of VL15 buffers, if not WC */ + /* + * points to area where PIOavail registers will be DMA'ed. + * Has to be on a page of it's own, because the page will be + * mapped into user program space. This copy is *ONLY* ever + * written by DMA, not by the driver! Need a copy per device + * when we get to multiple devices + */ + volatile __le64 *pioavailregs_dma; /* DMA'ed by chip */ + /* physical address where updates occur */ + dma_addr_t pioavailregs_phys; + + /* device-specific implementations of functions needed by + * common code. Contrary to previous consensus, we can't + * really just point to a device-specific table, because we + * may need to "bend", e.g. *_f_put_tid + */ + /* fallback to alternate interrupt type if possible */ + int (*f_intr_fallback)(struct qib_devdata *); + /* hard reset chip */ + int (*f_reset)(struct qib_devdata *); + void (*f_quiet_serdes)(struct qib_pportdata *); + int (*f_bringup_serdes)(struct qib_pportdata *); + int (*f_early_init)(struct qib_devdata *); + void (*f_clear_tids)(struct qib_devdata *, struct qib_ctxtdata *); + void (*f_put_tid)(struct qib_devdata *, u64 __iomem*, + u32, unsigned long); + void (*f_cleanup)(struct qib_devdata *); + void (*f_setextled)(struct qib_pportdata *, u32); + /* fill out chip-specific fields */ + int (*f_get_base_info)(struct qib_ctxtdata *, struct qib_base_info *); + /* free irq */ + void (*f_free_irq)(struct qib_devdata *); + struct qib_message_header *(*f_get_msgheader) + (struct qib_devdata *, __le32 *); + void (*f_config_ctxts)(struct qib_devdata *); + int (*f_get_ib_cfg)(struct qib_pportdata *, int); + int (*f_set_ib_cfg)(struct qib_pportdata *, int, u32); + int (*f_set_ib_loopback)(struct qib_pportdata *, const char *); + int (*f_get_ib_table)(struct qib_pportdata *, int, void *); + int (*f_set_ib_table)(struct qib_pportdata *, int, void *); + u32 (*f_iblink_state)(u64); + u8 (*f_ibphys_portstate)(u64); + void (*f_xgxs_reset)(struct qib_pportdata *); + /* per chip actions needed for IB Link up/down changes */ + int (*f_ib_updown)(struct qib_pportdata *, int, u64); + u32 __iomem *(*f_getsendbuf)(struct qib_pportdata *, u64, u32 *); + /* Read/modify/write of GPIO pins (potentially chip-specific */ + int (*f_gpio_mod)(struct qib_devdata *dd, u32 out, u32 dir, + u32 mask); + /* Enable writes to config EEPROM (if supported) */ + int (*f_eeprom_wen)(struct qib_devdata *dd, int wen); + /* + * modify rcvctrl shadow[s] and write to appropriate chip-regs. + * see above QIB_RCVCTRL_xxx_ENB/DIS for operations. + * (ctxt == -1) means "all contexts", only meaningful for + * clearing. Could remove if chip_spec shutdown properly done. + */ + void (*f_rcvctrl)(struct qib_pportdata *, unsigned int op, + int ctxt); + /* Read/modify/write sendctrl appropriately for op and port. */ + void (*f_sendctrl)(struct qib_pportdata *, u32 op); + void (*f_set_intr_state)(struct qib_devdata *, u32); + void (*f_set_armlaunch)(struct qib_devdata *, u32); + void (*f_wantpiobuf_intr)(struct qib_devdata *, u32); + int (*f_late_initreg)(struct qib_devdata *); + int (*f_init_sdma_regs)(struct qib_pportdata *); + u16 (*f_sdma_gethead)(struct qib_pportdata *); + int (*f_sdma_busy)(struct qib_pportdata *); + void (*f_sdma_update_tail)(struct qib_pportdata *, u16); + void (*f_sdma_set_desc_cnt)(struct qib_pportdata *, unsigned); + void (*f_sdma_sendctrl)(struct qib_pportdata *, unsigned); + void (*f_sdma_hw_clean_up)(struct qib_pportdata *); + void (*f_sdma_hw_start_up)(struct qib_pportdata *); + void (*f_sdma_init_early)(struct qib_pportdata *); + void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32); + void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32); + u32 (*f_hdrqempty)(struct qib_ctxtdata *); + u64 (*f_portcntr)(struct qib_pportdata *, u32); + u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **, + u64 **); + u32 (*f_read_portcntrs)(struct qib_devdata *, loff_t, u32, + char **, u64 **); + u32 (*f_setpbc_control)(struct qib_pportdata *, u32, u8, u8); + void (*f_initvl15_bufs)(struct qib_devdata *); + void (*f_init_ctxt)(struct qib_ctxtdata *); + void (*f_txchk_change)(struct qib_devdata *, u32, u32, u32, + struct qib_ctxtdata *); + void (*f_writescratch)(struct qib_devdata *, u32); + int (*f_tempsense_rd)(struct qib_devdata *, int regnum); + + char *boardname; /* human readable board info */ + + /* template for writing TIDs */ + u64 tidtemplate; + /* value to write to free TIDs */ + u64 tidinvalid; + + /* number of registers used for pioavail */ + u32 pioavregs; + /* device (not port) flags, basically device capabilities */ + u32 flags; + /* last buffer for user use */ + u32 lastctxt_piobuf; + + /* saturating counter of (non-port-specific) device interrupts */ + u32 int_counter; + + /* pio bufs allocated per ctxt */ + u32 pbufsctxt; + /* if remainder on bufs/ctxt, ctxts < extrabuf get 1 extra */ + u32 ctxts_extrabuf; + /* + * number of ctxts configured as max; zero is set to number chip + * supports, less gives more pio bufs/ctxt, etc. + */ + u32 cfgctxts; + /* + * number of ctxts available for PSM open + */ + u32 freectxts; + + /* + * hint that we should update pioavailshadow before + * looking for a PIO buffer + */ + u32 upd_pio_shadow; + + /* internal debugging stats */ + u32 maxpkts_call; + u32 avgpkts_call; + u64 nopiobufs; + + /* PCI Vendor ID (here for NodeInfo) */ + u16 vendorid; + /* PCI Device ID (here for NodeInfo) */ + u16 deviceid; + /* for write combining settings */ + unsigned long wc_cookie; + unsigned long wc_base; + unsigned long wc_len; + + /* shadow copy of struct page *'s for exp tid pages */ + struct page **pageshadow; + /* shadow copy of dma handles for exp tid pages */ + dma_addr_t *physshadow; + u64 __iomem *egrtidbase; + spinlock_t sendctrl_lock; /* protect changes to sendctrl shadow */ + /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ + spinlock_t uctxt_lock; /* rcd and user context changes */ + /* + * per unit status, see also portdata statusp + * mapped readonly into user processes so they can get unit and + * IB link status cheaply + */ + u64 *devstatusp; + char *freezemsg; /* freeze msg if hw error put chip in freeze */ + u32 freezelen; /* max length of freezemsg */ + /* timer used to prevent stats overflow, error throttling, etc. */ + struct timer_list stats_timer; + + /* timer to verify interrupts work, and fallback if possible */ + struct timer_list intrchk_timer; + unsigned long ureg_align; /* user register alignment */ + + /* + * Protects pioavailshadow, pioavailkernel, pio_need_disarm, and + * pio_writing. + */ + spinlock_t pioavail_lock; + + /* + * Shadow copies of registers; size indicates read access size. + * Most of them are readonly, but some are write-only register, + * where we manipulate the bits in the shadow copy, and then write + * the shadow copy to qlogic_ib. + * + * We deliberately make most of these 32 bits, since they have + * restricted range. For any that we read, we won't to generate 32 + * bit accesses, since Opteron will generate 2 separate 32 bit HT + * transactions for a 64 bit read, and we want to avoid unnecessary + * bus transactions. + */ + + /* This is the 64 bit group */ + + unsigned long pioavailshadow[6]; + /* bitmap of send buffers available for the kernel to use with PIO. */ + unsigned long pioavailkernel[6]; + /* bitmap of send buffers which need to be disarmed. */ + unsigned long pio_need_disarm[3]; + /* bitmap of send buffers which are being written to. */ + unsigned long pio_writing[3]; + /* kr_revision shadow */ + u64 revision; + /* Base GUID for device (from eeprom, network order) */ + __be64 base_guid; + + /* + * kr_sendpiobufbase value (chip offset of pio buffers), and the + * base of the 2KB buffer s(user processes only use 2K) + */ + u64 piobufbase; + u32 pio2k_bufbase; + + /* these are the "32 bit" regs */ + + /* number of GUIDs in the flash for this interface */ + u32 nguid; + /* + * the following two are 32-bit bitmasks, but {test,clear,set}_bit + * all expect bit fields to be "unsigned long" + */ + unsigned long rcvctrl; /* shadow per device rcvctrl */ + unsigned long sendctrl; /* shadow per device sendctrl */ + + /* value we put in kr_rcvhdrcnt */ + u32 rcvhdrcnt; + /* value we put in kr_rcvhdrsize */ + u32 rcvhdrsize; + /* value we put in kr_rcvhdrentsize */ + u32 rcvhdrentsize; + /* kr_ctxtcnt value */ + u32 ctxtcnt; + /* kr_pagealign value */ + u32 palign; + /* number of "2KB" PIO buffers */ + u32 piobcnt2k; + /* size in bytes of "2KB" PIO buffers */ + u32 piosize2k; + /* max usable size in dwords of a "2KB" PIO buffer before going "4KB" */ + u32 piosize2kmax_dwords; + /* number of "4KB" PIO buffers */ + u32 piobcnt4k; + /* size in bytes of "4KB" PIO buffers */ + u32 piosize4k; + /* kr_rcvegrbase value */ + u32 rcvegrbase; + /* kr_rcvtidbase value */ + u32 rcvtidbase; + /* kr_rcvtidcnt value */ + u32 rcvtidcnt; + /* kr_userregbase */ + u32 uregbase; + /* shadow the control register contents */ + u32 control; + + /* chip address space used by 4k pio buffers */ + u32 align4k; + /* size of each rcvegrbuffer */ + u16 rcvegrbufsize; + /* log2 of above */ + u16 rcvegrbufsize_shift; + /* localbus width (1, 2,4,8,16,32) from config space */ + u32 lbus_width; + /* localbus speed in MHz */ + u32 lbus_speed; + int unit; /* unit # of this chip */ + + /* start of CHIP_SPEC move to chipspec, but need code changes */ + /* low and high portions of MSI capability/vector */ + u32 msi_lo; + /* saved after PCIe init for restore after reset */ + u32 msi_hi; + /* MSI data (vector) saved for restore */ + u16 msi_data; + /* so we can rewrite it after a chip reset */ + u32 pcibar0; + /* so we can rewrite it after a chip reset */ + u32 pcibar1; + u64 rhdrhead_intr_off; + + /* + * ASCII serial number, from flash, large enough for original + * all digit strings, and longer QLogic serial number format + */ + u8 serial[16]; + /* human readable board version */ + u8 boardversion[96]; + u8 lbus_info[32]; /* human readable localbus info */ + /* chip major rev, from qib_revision */ + u8 majrev; + /* chip minor rev, from qib_revision */ + u8 minrev; + + /* Misc small ints */ + /* Number of physical ports available */ + u8 num_pports; + /* Lowest context number which can be used by user processes */ + u8 first_user_ctxt; + u8 n_krcv_queues; + u8 qpn_mask; + u8 skip_kctxt_mask; + + u16 rhf_offset; /* offset of RHF within receive header entry */ + + /* + * GPIO pins for twsi-connected devices, and device code for eeprom + */ + u8 gpio_sda_num; + u8 gpio_scl_num; + u8 twsi_eeprom_dev; + u8 board_atten; + + /* Support (including locks) for EEPROM logging of errors and time */ + /* control access to actual counters, timer */ + spinlock_t eep_st_lock; + /* control high-level access to EEPROM */ + struct mutex eep_lock; + uint64_t traffic_wds; + /* active time is kept in seconds, but logged in hours */ + atomic_t active_time; + /* Below are nominal shadow of EEPROM, new since last EEPROM update */ + uint8_t eep_st_errs[QIB_EEP_LOG_CNT]; + uint8_t eep_st_new_errs[QIB_EEP_LOG_CNT]; + uint16_t eep_hrs; + /* + * masks for which bits of errs, hwerrs that cause + * each of the counters to increment. + */ + struct qib_eep_log_mask eep_st_masks[QIB_EEP_LOG_CNT]; + struct qib_diag_client *diag_client; + spinlock_t qib_diag_trans_lock; /* protect diag observer ops */ + struct diag_observer_list_elt *diag_observer_list; + + u8 psxmitwait_supported; + /* cycle length of PS* counters in HW (in picoseconds) */ + u16 psxmitwait_check_rate; + /* high volume overflow errors defered to tasklet */ + struct tasklet_struct error_tasklet; +}; + +/* hol_state values */ +#define QIB_HOL_UP 0 +#define QIB_HOL_INIT 1 + +#define QIB_SDMA_SENDCTRL_OP_ENABLE (1U << 0) +#define QIB_SDMA_SENDCTRL_OP_INTENABLE (1U << 1) +#define QIB_SDMA_SENDCTRL_OP_HALT (1U << 2) +#define QIB_SDMA_SENDCTRL_OP_CLEANUP (1U << 3) +#define QIB_SDMA_SENDCTRL_OP_DRAIN (1U << 4) + +/* operation types for f_txchk_change() */ +#define TXCHK_CHG_TYPE_DIS1 3 +#define TXCHK_CHG_TYPE_ENAB1 2 +#define TXCHK_CHG_TYPE_KERN 1 +#define TXCHK_CHG_TYPE_USER 0 + +#define QIB_CHASE_TIME msecs_to_jiffies(145) +#define QIB_CHASE_DIS_TIME msecs_to_jiffies(160) + +/* Private data for file operations */ +struct qib_filedata { + struct qib_ctxtdata *rcd; + unsigned subctxt; + unsigned tidcursor; + struct qib_user_sdma_queue *pq; + int rec_cpu_num; /* for cpu affinity; -1 if none */ +}; + +extern struct list_head qib_dev_list; +extern spinlock_t qib_devs_lock; +extern struct qib_devdata *qib_lookup(int unit); +extern u32 qib_cpulist_count; +extern unsigned long *qib_cpulist; + +extern unsigned qib_wc_pat; +int qib_init(struct qib_devdata *, int); +int init_chip_wc_pat(struct qib_devdata *dd, u32); +int qib_enable_wc(struct qib_devdata *dd); +void qib_disable_wc(struct qib_devdata *dd); +int qib_count_units(int *npresentp, int *nupp); +int qib_count_active_units(void); + +int qib_cdev_init(int minor, const char *name, + const struct file_operations *fops, + struct cdev **cdevp, struct device **devp); +void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp); +int qib_dev_init(void); +void qib_dev_cleanup(void); + +int qib_diag_add(struct qib_devdata *); +void qib_diag_remove(struct qib_devdata *); +void qib_handle_e_ibstatuschanged(struct qib_pportdata *, u64); +void qib_sdma_update_tail(struct qib_pportdata *, u16); /* hold sdma_lock */ + +int qib_decode_err(struct qib_devdata *dd, char *buf, size_t blen, u64 err); +void qib_bad_intrstatus(struct qib_devdata *); +void qib_handle_urcv(struct qib_devdata *, u64); + +/* clean up any per-chip chip-specific stuff */ +void qib_chip_cleanup(struct qib_devdata *); +/* clean up any chip type-specific stuff */ +void qib_chip_done(void); + +/* check to see if we have to force ordering for write combining */ +int qib_unordered_wc(void); +void qib_pio_copy(void __iomem *to, const void *from, size_t count); + +void qib_disarm_piobufs(struct qib_devdata *, unsigned, unsigned); +int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *); +void qib_disarm_piobufs_set(struct qib_devdata *, unsigned long *, unsigned); +void qib_cancel_sends(struct qib_pportdata *); + +int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *); +int qib_setup_eagerbufs(struct qib_ctxtdata *); +void qib_set_ctxtcnt(struct qib_devdata *); +int qib_create_ctxts(struct qib_devdata *dd); +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); +void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); + +u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); +int qib_reset_device(int); +int qib_wait_linkstate(struct qib_pportdata *, u32, int); +int qib_set_linkstate(struct qib_pportdata *, u8); +int qib_set_mtu(struct qib_pportdata *, u16); +int qib_set_lid(struct qib_pportdata *, u32, u8); +void qib_hol_down(struct qib_pportdata *); +void qib_hol_init(struct qib_pportdata *); +void qib_hol_up(struct qib_pportdata *); +void qib_hol_event(unsigned long); +void qib_disable_after_error(struct qib_devdata *); +int qib_set_uevent_bits(struct qib_pportdata *, const int); + +/* for use in system calls, where we want to know device type, etc. */ +#define ctxt_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->rcd) +#define subctxt_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->subctxt) +#define tidcursor_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->tidcursor) +#define user_sdma_queue_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->pq) + +static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd) +{ + return ppd->dd; +} + +static inline struct qib_devdata *dd_from_dev(struct qib_ibdev *dev) +{ + return container_of(dev, struct qib_devdata, verbs_dev); +} + +static inline struct qib_devdata *dd_from_ibdev(struct ib_device *ibdev) +{ + return dd_from_dev(to_idev(ibdev)); +} + +static inline struct qib_pportdata *ppd_from_ibp(struct qib_ibport *ibp) +{ + return container_of(ibp, struct qib_pportdata, ibport_data); +} + +static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + WARN_ON(pidx >= dd->num_pports); + return &dd->pport[pidx].ibport_data; +} + +/* + * values for dd->flags (_device_ related flags) and + */ +#define QIB_HAS_LINK_LATENCY 0x1 /* supports link latency (IB 1.2) */ +#define QIB_INITTED 0x2 /* chip and driver up and initted */ +#define QIB_DOING_RESET 0x4 /* in the middle of doing chip reset */ +#define QIB_PRESENT 0x8 /* chip accesses can be done */ +#define QIB_PIO_FLUSH_WC 0x10 /* Needs Write combining flush for PIO */ +#define QIB_HAS_THRESH_UPDATE 0x40 +#define QIB_HAS_SDMA_TIMEOUT 0x80 +#define QIB_USE_SPCL_TRIG 0x100 /* SpecialTrigger launch enabled */ +#define QIB_NODMA_RTAIL 0x200 /* rcvhdrtail register DMA enabled */ +#define QIB_HAS_INTX 0x800 /* Supports INTx interrupts */ +#define QIB_HAS_SEND_DMA 0x1000 /* Supports Send DMA */ +#define QIB_HAS_VLSUPP 0x2000 /* Supports multiple VLs; PBC different */ +#define QIB_HAS_HDRSUPP 0x4000 /* Supports header suppression */ +#define QIB_BADINTR 0x8000 /* severe interrupt problems */ +#define QIB_DCA_ENABLED 0x10000 /* Direct Cache Access enabled */ +#define QIB_HAS_QSFP 0x20000 /* device (card instance) has QSFP */ + +/* + * values for ppd->lflags (_ib_port_ related flags) + */ +#define QIBL_LINKV 0x1 /* IB link state valid */ +#define QIBL_LINKDOWN 0x8 /* IB link is down */ +#define QIBL_LINKINIT 0x10 /* IB link level is up */ +#define QIBL_LINKARMED 0x20 /* IB link is ARMED */ +#define QIBL_LINKACTIVE 0x40 /* IB link is ACTIVE */ +/* leave a gap for more IB-link state */ +#define QIBL_IB_AUTONEG_INPROG 0x1000 /* non-IBTA DDR/QDR neg active */ +#define QIBL_IB_AUTONEG_FAILED 0x2000 /* non-IBTA DDR/QDR neg failed */ +#define QIBL_IB_LINK_DISABLED 0x4000 /* Linkdown-disable forced, + * Do not try to bring up */ +#define QIBL_IB_FORCE_NOTIFY 0x8000 /* force notify on next ib change */ + +/* IB dword length mask in PBC (lower 11 bits); same for all chips */ +#define QIB_PBC_LENGTH_MASK ((1 << 11) - 1) + + +/* ctxt_flag bit offsets */ + /* waiting for a packet to arrive */ +#define QIB_CTXT_WAITING_RCV 2 + /* master has not finished initializing */ +#define QIB_CTXT_MASTER_UNINIT 4 + /* waiting for an urgent packet to arrive */ +#define QIB_CTXT_WAITING_URG 5 + +/* free up any allocated data at closes */ +void qib_free_data(struct qib_ctxtdata *dd); +void qib_chg_pioavailkernel(struct qib_devdata *, unsigned, unsigned, + u32, struct qib_ctxtdata *); +struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *, + const struct pci_device_id *); +struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *, + const struct pci_device_id *); +struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *, + const struct pci_device_id *); +void qib_free_devdata(struct qib_devdata *); +struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra); + +#define QIB_TWSI_NO_DEV 0xFF +/* Below qib_twsi_ functions must be called with eep_lock held */ +int qib_twsi_reset(struct qib_devdata *dd); +int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, void *buffer, + int len); +int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, + const void *buffer, int len); +void qib_get_eeprom_info(struct qib_devdata *); +int qib_update_eeprom_log(struct qib_devdata *dd); +void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr); +void qib_dump_lookup_output_queue(struct qib_devdata *); +void qib_force_pio_avail_update(struct qib_devdata *); +void qib_clear_symerror_on_linkup(unsigned long opaque); + +/* + * Set LED override, only the two LSBs have "public" meaning, but + * any non-zero value substitutes them for the Link and LinkTrain + * LED states. + */ +#define QIB_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ +#define QIB_LED_LOG 2 /* Logical (link) YELLOW LED */ +void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val); + +/* send dma routines */ +int qib_setup_sdma(struct qib_pportdata *); +void qib_teardown_sdma(struct qib_pportdata *); +void __qib_sdma_intr(struct qib_pportdata *); +void qib_sdma_intr(struct qib_pportdata *); +int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, + u32, struct qib_verbs_txreq *); +/* ppd->sdma_lock should be locked before calling this. */ +int qib_sdma_make_progress(struct qib_pportdata *dd); + +/* must be called under qib_sdma_lock */ +static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd) +{ + return ppd->sdma_descq_cnt - + (ppd->sdma_descq_added - ppd->sdma_descq_removed) - 1; +} + +static inline int __qib_sdma_running(struct qib_pportdata *ppd) +{ + return ppd->sdma_state.current_state == qib_sdma_state_s99_running; +} +int qib_sdma_running(struct qib_pportdata *); + +void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); +void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); + +/* + * number of words used for protocol header if not set by qib_userinit(); + */ +#define QIB_DFLT_RCVHDRSIZE 9 + +/* + * We need to be able to handle an IB header of at least 24 dwords. + * We need the rcvhdrq large enough to handle largest IB header, but + * still have room for a 2KB MTU standard IB packet. + * Additionally, some processor/memory controller combinations + * benefit quite strongly from having the DMA'ed data be cacheline + * aligned and a cacheline multiple, so we set the size to 32 dwords + * (2 64-byte primary cachelines for pretty much all processors of + * interest). The alignment hurts nothing, other than using somewhat + * more memory. + */ +#define QIB_RCVHDR_ENTSIZE 32 + +int qib_get_user_pages(unsigned long, size_t, struct page **); +void qib_release_user_pages(struct page **, size_t); +int qib_eeprom_read(struct qib_devdata *, u8, void *, int); +int qib_eeprom_write(struct qib_devdata *, u8, const void *, int); +u32 __iomem *qib_getsendbuf_range(struct qib_devdata *, u32 *, u32, u32); +void qib_sendbuf_done(struct qib_devdata *, unsigned); + +static inline void qib_clear_rcvhdrtail(const struct qib_ctxtdata *rcd) +{ + *((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL; +} + +static inline u32 qib_get_rcvhdrtail(const struct qib_ctxtdata *rcd) +{ + /* + * volatile because it's a DMA target from the chip, routine is + * inlined, and don't want register caching or reordering. + */ + return (u32) le64_to_cpu( + *((volatile __le64 *)rcd->rcvhdrtail_kvaddr)); /* DMA'ed */ +} + +static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd) +{ + const struct qib_devdata *dd = rcd->dd; + u32 hdrqtail; + + if (dd->flags & QIB_NODMA_RTAIL) { + __le32 *rhf_addr; + u32 seq; + + rhf_addr = (__le32 *) rcd->rcvhdrq + + rcd->head + dd->rhf_offset; + seq = qib_hdrget_seq(rhf_addr); + hdrqtail = rcd->head; + if (seq == rcd->seq_cnt) + hdrqtail++; + } else + hdrqtail = qib_get_rcvhdrtail(rcd); + + return hdrqtail; +} + +/* + * sysfs interface. + */ + +extern const char ib_qib_version[]; + +int qib_device_create(struct qib_devdata *); +void qib_device_remove(struct qib_devdata *); + +int qib_create_port_files(struct ib_device *ibdev, u8 port_num, + struct kobject *kobj); +int qib_verbs_register_sysfs(struct qib_devdata *); +void qib_verbs_unregister_sysfs(struct qib_devdata *); +/* Hook for sysfs read of QSFP */ +extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len); + +int __init qib_init_qibfs(void); +int __exit qib_exit_qibfs(void); + +int qibfs_add(struct qib_devdata *); +int qibfs_remove(struct qib_devdata *); + +int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); +int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, + const struct pci_device_id *); +void qib_pcie_ddcleanup(struct qib_devdata *); +int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); +int qib_reinit_intr(struct qib_devdata *); +void qib_enable_intx(struct pci_dev *); +void qib_nomsi(struct qib_devdata *); +void qib_nomsix(struct qib_devdata *); +void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); +void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); + +/* + * dma_addr wrappers - all 0's invalid for hw + */ +dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long, + size_t, int); +const char *qib_get_unit_name(int unit); + +/* + * Flush write combining store buffers (if present) and perform a write + * barrier. + */ +#if defined(CONFIG_X86_64) +#define qib_flush_wc() asm volatile("sfence" : : : "memory") +#else +#define qib_flush_wc() wmb() /* no reorder around wc flush */ +#endif + +/* global module parameter variables */ +extern unsigned qib_ibmtu; +extern ushort qib_cfgctxts; +extern ushort qib_num_cfg_vls; +extern ushort qib_mini_init; /* If set, do few (ideally 0) writes to chip */ +extern unsigned qib_n_krcv_queues; +extern unsigned qib_sdma_fetch_arb; +extern unsigned qib_compat_ddr_negotiate; +extern int qib_special_trigger; + +extern struct mutex qib_mutex; + +/* Number of seconds before our card status check... */ +#define STATUS_TIMEOUT 60 + +#define QIB_DRV_NAME "ib_qib" +#define QIB_USER_MINOR_BASE 0 +#define QIB_TRACE_MINOR 127 +#define QIB_DIAGPKT_MINOR 128 +#define QIB_DIAG_MINOR_BASE 129 +#define QIB_NMINORS 255 + +#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 +#define PCI_VENDOR_ID_QLOGIC 0x1077 +#define PCI_DEVICE_ID_QLOGIC_IB_6120 0x10 +#define PCI_DEVICE_ID_QLOGIC_IB_7220 0x7220 +#define PCI_DEVICE_ID_QLOGIC_IB_7322 0x7322 + +/* + * qib_early_err is used (only!) to print early errors before devdata is + * allocated, or when dd->pcidev may not be valid, and at the tail end of + * cleanup when devdata may have been freed, etc. qib_dev_porterr is + * the same as qib_dev_err, but is used when the message really needs + * the IB port# to be definitive as to what's happening.. + * All of these go to the trace log, and the trace log entry is done + * first to avoid possible serial port delays from printk. + */ +#define qib_early_err(dev, fmt, ...) \ + do { \ + dev_err(dev, fmt, ##__VA_ARGS__); \ + } while (0) + +#define qib_dev_err(dd, fmt, ...) \ + do { \ + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ + } while (0) + +#define qib_dev_porterr(dd, port, fmt, ...) \ + do { \ + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__); \ + } while (0) + +#define qib_devinfo(pcidev, fmt, ...) \ + do { \ + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \ + } while (0) + +/* + * this is used for formatting hw error messages... + */ +struct qib_hwerror_msgs { + u64 mask; + const char *msg; + size_t sz; +}; + +#define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b } + +/* in qib_intr.c... */ +void qib_format_hwerrors(u64 hwerrs, + const struct qib_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, char *msg, size_t lmsg); +#endif /* _QIB_KERNEL_H */ diff --git a/drivers/infiniband/hw/qib/qib_6120_regs.h b/drivers/infiniband/hw/qib/qib_6120_regs.h new file mode 100644 index 00000000..e16cb6f7 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_6120_regs.h @@ -0,0 +1,977 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is mechanically generated from RTL. Any hand-edits will be lost! */ + +#define QIB_6120_Revision_OFFS 0x0 +#define QIB_6120_Revision_R_Simulator_LSB 0x3F +#define QIB_6120_Revision_R_Simulator_RMASK 0x1 +#define QIB_6120_Revision_Reserved_LSB 0x28 +#define QIB_6120_Revision_Reserved_RMASK 0x7FFFFF +#define QIB_6120_Revision_BoardID_LSB 0x20 +#define QIB_6120_Revision_BoardID_RMASK 0xFF +#define QIB_6120_Revision_R_SW_LSB 0x18 +#define QIB_6120_Revision_R_SW_RMASK 0xFF +#define QIB_6120_Revision_R_Arch_LSB 0x10 +#define QIB_6120_Revision_R_Arch_RMASK 0xFF +#define QIB_6120_Revision_R_ChipRevMajor_LSB 0x8 +#define QIB_6120_Revision_R_ChipRevMajor_RMASK 0xFF +#define QIB_6120_Revision_R_ChipRevMinor_LSB 0x0 +#define QIB_6120_Revision_R_ChipRevMinor_RMASK 0xFF + +#define QIB_6120_Control_OFFS 0x8 +#define QIB_6120_Control_TxLatency_LSB 0x4 +#define QIB_6120_Control_TxLatency_RMASK 0x1 +#define QIB_6120_Control_PCIERetryBufDiagEn_LSB 0x3 +#define QIB_6120_Control_PCIERetryBufDiagEn_RMASK 0x1 +#define QIB_6120_Control_LinkEn_LSB 0x2 +#define QIB_6120_Control_LinkEn_RMASK 0x1 +#define QIB_6120_Control_FreezeMode_LSB 0x1 +#define QIB_6120_Control_FreezeMode_RMASK 0x1 +#define QIB_6120_Control_SyncReset_LSB 0x0 +#define QIB_6120_Control_SyncReset_RMASK 0x1 + +#define QIB_6120_PageAlign_OFFS 0x10 + +#define QIB_6120_PortCnt_OFFS 0x18 + +#define QIB_6120_SendRegBase_OFFS 0x30 + +#define QIB_6120_UserRegBase_OFFS 0x38 + +#define QIB_6120_CntrRegBase_OFFS 0x40 + +#define QIB_6120_Scratch_OFFS 0x48 +#define QIB_6120_Scratch_TopHalf_LSB 0x20 +#define QIB_6120_Scratch_TopHalf_RMASK 0xFFFFFFFF +#define QIB_6120_Scratch_BottomHalf_LSB 0x0 +#define QIB_6120_Scratch_BottomHalf_RMASK 0xFFFFFFFF + +#define QIB_6120_IntBlocked_OFFS 0x60 +#define QIB_6120_IntBlocked_ErrorIntBlocked_LSB 0x1F +#define QIB_6120_IntBlocked_ErrorIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_PioSetIntBlocked_LSB 0x1E +#define QIB_6120_IntBlocked_PioSetIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_LSB 0x1D +#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_assertGPIOIntBlocked_LSB 0x1C +#define QIB_6120_IntBlocked_assertGPIOIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_Reserved_LSB 0xF +#define QIB_6120_IntBlocked_Reserved_RMASK 0x1FFF +#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_LSB 0x10 +#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_LSB 0xF +#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_LSB 0xE +#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_LSB 0xD +#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_LSB 0xC +#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_Reserved1_LSB 0x5 +#define QIB_6120_IntBlocked_Reserved1_RMASK 0x7F +#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_LSB 0x4 +#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_LSB 0x3 +#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_LSB 0x2 +#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_LSB 0x1 +#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_LSB 0x0 +#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_RMASK 0x1 + +#define QIB_6120_IntMask_OFFS 0x68 +#define QIB_6120_IntMask_ErrorIntMask_LSB 0x1F +#define QIB_6120_IntMask_ErrorIntMask_RMASK 0x1 +#define QIB_6120_IntMask_PioSetIntMask_LSB 0x1E +#define QIB_6120_IntMask_PioSetIntMask_RMASK 0x1 +#define QIB_6120_IntMask_PioBufAvailIntMask_LSB 0x1D +#define QIB_6120_IntMask_PioBufAvailIntMask_RMASK 0x1 +#define QIB_6120_IntMask_assertGPIOIntMask_LSB 0x1C +#define QIB_6120_IntMask_assertGPIOIntMask_RMASK 0x1 +#define QIB_6120_IntMask_Reserved_LSB 0x11 +#define QIB_6120_IntMask_Reserved_RMASK 0x7FF +#define QIB_6120_IntMask_RcvAvail4IntMask_LSB 0x10 +#define QIB_6120_IntMask_RcvAvail4IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail3IntMask_LSB 0xF +#define QIB_6120_IntMask_RcvAvail3IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail2IntMask_LSB 0xE +#define QIB_6120_IntMask_RcvAvail2IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail1IntMask_LSB 0xD +#define QIB_6120_IntMask_RcvAvail1IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail0IntMask_LSB 0xC +#define QIB_6120_IntMask_RcvAvail0IntMask_RMASK 0x1 +#define QIB_6120_IntMask_Reserved1_LSB 0x5 +#define QIB_6120_IntMask_Reserved1_RMASK 0x7F +#define QIB_6120_IntMask_RcvUrg4IntMask_LSB 0x4 +#define QIB_6120_IntMask_RcvUrg4IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg3IntMask_LSB 0x3 +#define QIB_6120_IntMask_RcvUrg3IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg2IntMask_LSB 0x2 +#define QIB_6120_IntMask_RcvUrg2IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg1IntMask_LSB 0x1 +#define QIB_6120_IntMask_RcvUrg1IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg0IntMask_LSB 0x0 +#define QIB_6120_IntMask_RcvUrg0IntMask_RMASK 0x1 + +#define QIB_6120_IntStatus_OFFS 0x70 +#define QIB_6120_IntStatus_Error_LSB 0x1F +#define QIB_6120_IntStatus_Error_RMASK 0x1 +#define QIB_6120_IntStatus_PioSent_LSB 0x1E +#define QIB_6120_IntStatus_PioSent_RMASK 0x1 +#define QIB_6120_IntStatus_PioBufAvail_LSB 0x1D +#define QIB_6120_IntStatus_PioBufAvail_RMASK 0x1 +#define QIB_6120_IntStatus_assertGPIO_LSB 0x1C +#define QIB_6120_IntStatus_assertGPIO_RMASK 0x1 +#define QIB_6120_IntStatus_Reserved_LSB 0xF +#define QIB_6120_IntStatus_Reserved_RMASK 0x1FFF +#define QIB_6120_IntStatus_RcvAvail4_LSB 0x10 +#define QIB_6120_IntStatus_RcvAvail4_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail3_LSB 0xF +#define QIB_6120_IntStatus_RcvAvail3_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail2_LSB 0xE +#define QIB_6120_IntStatus_RcvAvail2_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail1_LSB 0xD +#define QIB_6120_IntStatus_RcvAvail1_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail0_LSB 0xC +#define QIB_6120_IntStatus_RcvAvail0_RMASK 0x1 +#define QIB_6120_IntStatus_Reserved1_LSB 0x5 +#define QIB_6120_IntStatus_Reserved1_RMASK 0x7F +#define QIB_6120_IntStatus_RcvUrg4_LSB 0x4 +#define QIB_6120_IntStatus_RcvUrg4_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg3_LSB 0x3 +#define QIB_6120_IntStatus_RcvUrg3_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg2_LSB 0x2 +#define QIB_6120_IntStatus_RcvUrg2_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg1_LSB 0x1 +#define QIB_6120_IntStatus_RcvUrg1_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg0_LSB 0x0 +#define QIB_6120_IntStatus_RcvUrg0_RMASK 0x1 + +#define QIB_6120_IntClear_OFFS 0x78 +#define QIB_6120_IntClear_ErrorIntClear_LSB 0x1F +#define QIB_6120_IntClear_ErrorIntClear_RMASK 0x1 +#define QIB_6120_IntClear_PioSetIntClear_LSB 0x1E +#define QIB_6120_IntClear_PioSetIntClear_RMASK 0x1 +#define QIB_6120_IntClear_PioBufAvailIntClear_LSB 0x1D +#define QIB_6120_IntClear_PioBufAvailIntClear_RMASK 0x1 +#define QIB_6120_IntClear_assertGPIOIntClear_LSB 0x1C +#define QIB_6120_IntClear_assertGPIOIntClear_RMASK 0x1 +#define QIB_6120_IntClear_Reserved_LSB 0xF +#define QIB_6120_IntClear_Reserved_RMASK 0x1FFF +#define QIB_6120_IntClear_RcvAvail4IntClear_LSB 0x10 +#define QIB_6120_IntClear_RcvAvail4IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail3IntClear_LSB 0xF +#define QIB_6120_IntClear_RcvAvail3IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail2IntClear_LSB 0xE +#define QIB_6120_IntClear_RcvAvail2IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail1IntClear_LSB 0xD +#define QIB_6120_IntClear_RcvAvail1IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail0IntClear_LSB 0xC +#define QIB_6120_IntClear_RcvAvail0IntClear_RMASK 0x1 +#define QIB_6120_IntClear_Reserved1_LSB 0x5 +#define QIB_6120_IntClear_Reserved1_RMASK 0x7F +#define QIB_6120_IntClear_RcvUrg4IntClear_LSB 0x4 +#define QIB_6120_IntClear_RcvUrg4IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg3IntClear_LSB 0x3 +#define QIB_6120_IntClear_RcvUrg3IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg2IntClear_LSB 0x2 +#define QIB_6120_IntClear_RcvUrg2IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg1IntClear_LSB 0x1 +#define QIB_6120_IntClear_RcvUrg1IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg0IntClear_LSB 0x0 +#define QIB_6120_IntClear_RcvUrg0IntClear_RMASK 0x1 + +#define QIB_6120_ErrMask_OFFS 0x80 +#define QIB_6120_ErrMask_Reserved_LSB 0x34 +#define QIB_6120_ErrMask_Reserved_RMASK 0xFFF +#define QIB_6120_ErrMask_HardwareErrMask_LSB 0x33 +#define QIB_6120_ErrMask_HardwareErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_ResetNegatedMask_LSB 0x32 +#define QIB_6120_ErrMask_ResetNegatedMask_RMASK 0x1 +#define QIB_6120_ErrMask_InvalidAddrErrMask_LSB 0x31 +#define QIB_6120_ErrMask_InvalidAddrErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_IBStatusChangedMask_LSB 0x30 +#define QIB_6120_ErrMask_IBStatusChangedMask_RMASK 0x1 +#define QIB_6120_ErrMask_Reserved1_LSB 0x26 +#define QIB_6120_ErrMask_Reserved1_RMASK 0x3FF +#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_LSB 0x25 +#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24 +#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_LSB 0x23 +#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_LSB 0x22 +#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21 +#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendPktLenErrMask_LSB 0x20 +#define QIB_6120_ErrMask_SendPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendUnderRunErrMask_LSB 0x1F +#define QIB_6120_ErrMask_SendUnderRunErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendMaxPktLenErrMask_LSB 0x1E +#define QIB_6120_ErrMask_SendMaxPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendMinPktLenErrMask_LSB 0x1D +#define QIB_6120_ErrMask_SendMinPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_Reserved2_LSB 0x12 +#define QIB_6120_ErrMask_Reserved2_RMASK 0x7FF +#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_LSB 0x11 +#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvHdrErrMask_LSB 0x10 +#define QIB_6120_ErrMask_RcvHdrErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvHdrLenErrMask_LSB 0xF +#define QIB_6120_ErrMask_RcvHdrLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvBadTidErrMask_LSB 0xE +#define QIB_6120_ErrMask_RcvBadTidErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvHdrFullErrMask_LSB 0xD +#define QIB_6120_ErrMask_RcvHdrFullErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvEgrFullErrMask_LSB 0xC +#define QIB_6120_ErrMask_RcvEgrFullErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvBadVersionErrMask_LSB 0xB +#define QIB_6120_ErrMask_RcvBadVersionErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvIBFlowErrMask_LSB 0xA +#define QIB_6120_ErrMask_RcvIBFlowErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvEBPErrMask_LSB 0x9 +#define QIB_6120_ErrMask_RcvEBPErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8 +#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7 +#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvShortPktLenErrMask_LSB 0x6 +#define QIB_6120_ErrMask_RcvShortPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvLongPktLenErrMask_LSB 0x5 +#define QIB_6120_ErrMask_RcvLongPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_LSB 0x4 +#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvMinPktLenErrMask_LSB 0x3 +#define QIB_6120_ErrMask_RcvMinPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvICRCErrMask_LSB 0x2 +#define QIB_6120_ErrMask_RcvICRCErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvVCRCErrMask_LSB 0x1 +#define QIB_6120_ErrMask_RcvVCRCErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvFormatErrMask_LSB 0x0 +#define QIB_6120_ErrMask_RcvFormatErrMask_RMASK 0x1 + +#define QIB_6120_ErrStatus_OFFS 0x88 +#define QIB_6120_ErrStatus_Reserved_LSB 0x34 +#define QIB_6120_ErrStatus_Reserved_RMASK 0xFFF +#define QIB_6120_ErrStatus_HardwareErr_LSB 0x33 +#define QIB_6120_ErrStatus_HardwareErr_RMASK 0x1 +#define QIB_6120_ErrStatus_ResetNegated_LSB 0x32 +#define QIB_6120_ErrStatus_ResetNegated_RMASK 0x1 +#define QIB_6120_ErrStatus_InvalidAddrErr_LSB 0x31 +#define QIB_6120_ErrStatus_InvalidAddrErr_RMASK 0x1 +#define QIB_6120_ErrStatus_IBStatusChanged_LSB 0x30 +#define QIB_6120_ErrStatus_IBStatusChanged_RMASK 0x1 +#define QIB_6120_ErrStatus_Reserved1_LSB 0x26 +#define QIB_6120_ErrStatus_Reserved1_RMASK 0x3FF +#define QIB_6120_ErrStatus_SendUnsupportedVLErr_LSB 0x25 +#define QIB_6120_ErrStatus_SendUnsupportedVLErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24 +#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendPioArmLaunchErr_LSB 0x23 +#define QIB_6120_ErrStatus_SendPioArmLaunchErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendDroppedDataPktErr_LSB 0x22 +#define QIB_6120_ErrStatus_SendDroppedDataPktErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_LSB 0x21 +#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendPktLenErr_LSB 0x20 +#define QIB_6120_ErrStatus_SendPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendUnderRunErr_LSB 0x1F +#define QIB_6120_ErrStatus_SendUnderRunErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendMaxPktLenErr_LSB 0x1E +#define QIB_6120_ErrStatus_SendMaxPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendMinPktLenErr_LSB 0x1D +#define QIB_6120_ErrStatus_SendMinPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_Reserved2_LSB 0x12 +#define QIB_6120_ErrStatus_Reserved2_RMASK 0x7FF +#define QIB_6120_ErrStatus_RcvIBLostLinkErr_LSB 0x11 +#define QIB_6120_ErrStatus_RcvIBLostLinkErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvHdrErr_LSB 0x10 +#define QIB_6120_ErrStatus_RcvHdrErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvHdrLenErr_LSB 0xF +#define QIB_6120_ErrStatus_RcvHdrLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvBadTidErr_LSB 0xE +#define QIB_6120_ErrStatus_RcvBadTidErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvHdrFullErr_LSB 0xD +#define QIB_6120_ErrStatus_RcvHdrFullErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvEgrFullErr_LSB 0xC +#define QIB_6120_ErrStatus_RcvEgrFullErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvBadVersionErr_LSB 0xB +#define QIB_6120_ErrStatus_RcvBadVersionErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvIBFlowErr_LSB 0xA +#define QIB_6120_ErrStatus_RcvIBFlowErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvEBPErr_LSB 0x9 +#define QIB_6120_ErrStatus_RcvEBPErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_LSB 0x8 +#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_LSB 0x7 +#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvShortPktLenErr_LSB 0x6 +#define QIB_6120_ErrStatus_RcvShortPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvLongPktLenErr_LSB 0x5 +#define QIB_6120_ErrStatus_RcvLongPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvMaxPktLenErr_LSB 0x4 +#define QIB_6120_ErrStatus_RcvMaxPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvMinPktLenErr_LSB 0x3 +#define QIB_6120_ErrStatus_RcvMinPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvICRCErr_LSB 0x2 +#define QIB_6120_ErrStatus_RcvICRCErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvVCRCErr_LSB 0x1 +#define QIB_6120_ErrStatus_RcvVCRCErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvFormatErr_LSB 0x0 +#define QIB_6120_ErrStatus_RcvFormatErr_RMASK 0x1 + +#define QIB_6120_ErrClear_OFFS 0x90 +#define QIB_6120_ErrClear_Reserved_LSB 0x34 +#define QIB_6120_ErrClear_Reserved_RMASK 0xFFF +#define QIB_6120_ErrClear_HardwareErrClear_LSB 0x33 +#define QIB_6120_ErrClear_HardwareErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_ResetNegatedClear_LSB 0x32 +#define QIB_6120_ErrClear_ResetNegatedClear_RMASK 0x1 +#define QIB_6120_ErrClear_InvalidAddrErrClear_LSB 0x31 +#define QIB_6120_ErrClear_InvalidAddrErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_IBStatusChangedClear_LSB 0x30 +#define QIB_6120_ErrClear_IBStatusChangedClear_RMASK 0x1 +#define QIB_6120_ErrClear_Reserved1_LSB 0x26 +#define QIB_6120_ErrClear_Reserved1_RMASK 0x3FF +#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_LSB 0x25 +#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24 +#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_LSB 0x23 +#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_LSB 0x22 +#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21 +#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendPktLenErrClear_LSB 0x20 +#define QIB_6120_ErrClear_SendPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendUnderRunErrClear_LSB 0x1F +#define QIB_6120_ErrClear_SendUnderRunErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendMaxPktLenErrClear_LSB 0x1E +#define QIB_6120_ErrClear_SendMaxPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendMinPktLenErrClear_LSB 0x1D +#define QIB_6120_ErrClear_SendMinPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_Reserved2_LSB 0x12 +#define QIB_6120_ErrClear_Reserved2_RMASK 0x7FF +#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_LSB 0x11 +#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvHdrErrClear_LSB 0x10 +#define QIB_6120_ErrClear_RcvHdrErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvHdrLenErrClear_LSB 0xF +#define QIB_6120_ErrClear_RcvHdrLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvBadTidErrClear_LSB 0xE +#define QIB_6120_ErrClear_RcvBadTidErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvHdrFullErrClear_LSB 0xD +#define QIB_6120_ErrClear_RcvHdrFullErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvEgrFullErrClear_LSB 0xC +#define QIB_6120_ErrClear_RcvEgrFullErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvBadVersionErrClear_LSB 0xB +#define QIB_6120_ErrClear_RcvBadVersionErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvIBFlowErrClear_LSB 0xA +#define QIB_6120_ErrClear_RcvIBFlowErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvEBPErrClear_LSB 0x9 +#define QIB_6120_ErrClear_RcvEBPErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8 +#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7 +#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvShortPktLenErrClear_LSB 0x6 +#define QIB_6120_ErrClear_RcvShortPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvLongPktLenErrClear_LSB 0x5 +#define QIB_6120_ErrClear_RcvLongPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_LSB 0x4 +#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvMinPktLenErrClear_LSB 0x3 +#define QIB_6120_ErrClear_RcvMinPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvICRCErrClear_LSB 0x2 +#define QIB_6120_ErrClear_RcvICRCErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvVCRCErrClear_LSB 0x1 +#define QIB_6120_ErrClear_RcvVCRCErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvFormatErrClear_LSB 0x0 +#define QIB_6120_ErrClear_RcvFormatErrClear_RMASK 0x1 + +#define QIB_6120_HwErrMask_OFFS 0x98 +#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F +#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1 +#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E +#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved_LSB 0x3D +#define QIB_6120_HwErrMask_Reserved_RMASK 0x1 +#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C +#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x3B +#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x3A +#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved1_LSB 0x39 +#define QIB_6120_HwErrMask_Reserved1_RMASK 0x1 +#define QIB_6120_HwErrMask_IBPLLrfSlipMask_LSB 0x38 +#define QIB_6120_HwErrMask_IBPLLrfSlipMask_RMASK 0x1 +#define QIB_6120_HwErrMask_IBPLLfbSlipMask_LSB 0x37 +#define QIB_6120_HwErrMask_IBPLLfbSlipMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_LSB 0x36 +#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved2_LSB 0x33 +#define QIB_6120_HwErrMask_Reserved2_RMASK 0x7 +#define QIB_6120_HwErrMask_RXEMemParityErrMask_LSB 0x2C +#define QIB_6120_HwErrMask_RXEMemParityErrMask_RMASK 0x7F +#define QIB_6120_HwErrMask_TXEMemParityErrMask_LSB 0x28 +#define QIB_6120_HwErrMask_TXEMemParityErrMask_RMASK 0xF +#define QIB_6120_HwErrMask_Reserved3_LSB 0x22 +#define QIB_6120_HwErrMask_Reserved3_RMASK 0x3F +#define QIB_6120_HwErrMask_PCIeBusParityErrMask_LSB 0x1F +#define QIB_6120_HwErrMask_PCIeBusParityErrMask_RMASK 0x7 +#define QIB_6120_HwErrMask_PcieCplTimeoutMask_LSB 0x1E +#define QIB_6120_HwErrMask_PcieCplTimeoutMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PoisonedTLPMask_LSB 0x1D +#define QIB_6120_HwErrMask_PoisonedTLPMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved4_LSB 0x6 +#define QIB_6120_HwErrMask_Reserved4_RMASK 0x7FFFFF +#define QIB_6120_HwErrMask_PCIeMemParityErrMask_LSB 0x0 +#define QIB_6120_HwErrMask_PCIeMemParityErrMask_RMASK 0x3F + +#define QIB_6120_HwErrStatus_OFFS 0xA0 +#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F +#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E +#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved_LSB 0x3D +#define QIB_6120_HwErrStatus_Reserved_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C +#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1 +#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x3B +#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1 +#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x3A +#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved1_LSB 0x39 +#define QIB_6120_HwErrStatus_Reserved1_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBPLLrfSlip_LSB 0x38 +#define QIB_6120_HwErrStatus_IBPLLrfSlip_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBPLLfbSlip_LSB 0x37 +#define QIB_6120_HwErrStatus_IBPLLfbSlip_RMASK 0x1 +#define QIB_6120_HwErrStatus_PowerOnBISTFailed_LSB 0x36 +#define QIB_6120_HwErrStatus_PowerOnBISTFailed_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved2_LSB 0x33 +#define QIB_6120_HwErrStatus_Reserved2_RMASK 0x7 +#define QIB_6120_HwErrStatus_RXEMemParity_LSB 0x2C +#define QIB_6120_HwErrStatus_RXEMemParity_RMASK 0x7F +#define QIB_6120_HwErrStatus_TXEMemParity_LSB 0x28 +#define QIB_6120_HwErrStatus_TXEMemParity_RMASK 0xF +#define QIB_6120_HwErrStatus_Reserved3_LSB 0x22 +#define QIB_6120_HwErrStatus_Reserved3_RMASK 0x3F +#define QIB_6120_HwErrStatus_PCIeBusParity_LSB 0x1F +#define QIB_6120_HwErrStatus_PCIeBusParity_RMASK 0x7 +#define QIB_6120_HwErrStatus_PcieCplTimeout_LSB 0x1E +#define QIB_6120_HwErrStatus_PcieCplTimeout_RMASK 0x1 +#define QIB_6120_HwErrStatus_PoisenedTLP_LSB 0x1D +#define QIB_6120_HwErrStatus_PoisenedTLP_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved4_LSB 0x6 +#define QIB_6120_HwErrStatus_Reserved4_RMASK 0x7FFFFF +#define QIB_6120_HwErrStatus_PCIeMemParity_LSB 0x0 +#define QIB_6120_HwErrStatus_PCIeMemParity_RMASK 0x3F + +#define QIB_6120_HwErrClear_OFFS 0xA8 +#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F +#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1 +#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E +#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved_LSB 0x3D +#define QIB_6120_HwErrClear_Reserved_RMASK 0x1 +#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C +#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x3B +#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x3A +#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved1_LSB 0x39 +#define QIB_6120_HwErrClear_Reserved1_RMASK 0x1 +#define QIB_6120_HwErrClear_IBPLLrfSlipClear_LSB 0x38 +#define QIB_6120_HwErrClear_IBPLLrfSlipClear_RMASK 0x1 +#define QIB_6120_HwErrClear_IBPLLfbSlipClear_LSB 0x37 +#define QIB_6120_HwErrClear_IBPLLfbSlipClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_LSB 0x36 +#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved2_LSB 0x33 +#define QIB_6120_HwErrClear_Reserved2_RMASK 0x7 +#define QIB_6120_HwErrClear_RXEMemParityClear_LSB 0x2C +#define QIB_6120_HwErrClear_RXEMemParityClear_RMASK 0x7F +#define QIB_6120_HwErrClear_TXEMemParityClear_LSB 0x28 +#define QIB_6120_HwErrClear_TXEMemParityClear_RMASK 0xF +#define QIB_6120_HwErrClear_Reserved3_LSB 0x22 +#define QIB_6120_HwErrClear_Reserved3_RMASK 0x3F +#define QIB_6120_HwErrClear_PCIeBusParityClr_LSB 0x1F +#define QIB_6120_HwErrClear_PCIeBusParityClr_RMASK 0x7 +#define QIB_6120_HwErrClear_PcieCplTimeoutClear_LSB 0x1E +#define QIB_6120_HwErrClear_PcieCplTimeoutClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PoisonedTLPClear_LSB 0x1D +#define QIB_6120_HwErrClear_PoisonedTLPClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved4_LSB 0x6 +#define QIB_6120_HwErrClear_Reserved4_RMASK 0x7FFFFF +#define QIB_6120_HwErrClear_PCIeMemParityClr_LSB 0x0 +#define QIB_6120_HwErrClear_PCIeMemParityClr_RMASK 0x3F + +#define QIB_6120_HwDiagCtrl_OFFS 0xB0 +#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F +#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E +#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_CounterWrEnable_LSB 0x3D +#define QIB_6120_HwDiagCtrl_CounterWrEnable_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_CounterDisable_LSB 0x3C +#define QIB_6120_HwDiagCtrl_CounterDisable_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_Reserved_LSB 0x33 +#define QIB_6120_HwDiagCtrl_Reserved_RMASK 0x1FF +#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C +#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F +#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28 +#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF +#define QIB_6120_HwDiagCtrl_Reserved1_LSB 0x23 +#define QIB_6120_HwDiagCtrl_Reserved1_RMASK 0x1F +#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F +#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF +#define QIB_6120_HwDiagCtrl_Reserved2_LSB 0x6 +#define QIB_6120_HwDiagCtrl_Reserved2_RMASK 0x1FFFFFF +#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_LSB 0x0 +#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_RMASK 0x3F + +#define QIB_6120_IBCStatus_OFFS 0xC0 +#define QIB_6120_IBCStatus_TxCreditOk_LSB 0x1F +#define QIB_6120_IBCStatus_TxCreditOk_RMASK 0x1 +#define QIB_6120_IBCStatus_TxReady_LSB 0x1E +#define QIB_6120_IBCStatus_TxReady_RMASK 0x1 +#define QIB_6120_IBCStatus_Reserved_LSB 0x7 +#define QIB_6120_IBCStatus_Reserved_RMASK 0x7FFFFF +#define QIB_6120_IBCStatus_LinkState_LSB 0x4 +#define QIB_6120_IBCStatus_LinkState_RMASK 0x7 +#define QIB_6120_IBCStatus_LinkTrainingState_LSB 0x0 +#define QIB_6120_IBCStatus_LinkTrainingState_RMASK 0xF + +#define QIB_6120_IBCCtrl_OFFS 0xC8 +#define QIB_6120_IBCCtrl_Loopback_LSB 0x3F +#define QIB_6120_IBCCtrl_Loopback_RMASK 0x1 +#define QIB_6120_IBCCtrl_LinkDownDefaultState_LSB 0x3E +#define QIB_6120_IBCCtrl_LinkDownDefaultState_RMASK 0x1 +#define QIB_6120_IBCCtrl_Reserved_LSB 0x2B +#define QIB_6120_IBCCtrl_Reserved_RMASK 0x7FFFF +#define QIB_6120_IBCCtrl_CreditScale_LSB 0x28 +#define QIB_6120_IBCCtrl_CreditScale_RMASK 0x7 +#define QIB_6120_IBCCtrl_OverrunThreshold_LSB 0x24 +#define QIB_6120_IBCCtrl_OverrunThreshold_RMASK 0xF +#define QIB_6120_IBCCtrl_PhyerrThreshold_LSB 0x20 +#define QIB_6120_IBCCtrl_PhyerrThreshold_RMASK 0xF +#define QIB_6120_IBCCtrl_Reserved1_LSB 0x1F +#define QIB_6120_IBCCtrl_Reserved1_RMASK 0x1 +#define QIB_6120_IBCCtrl_MaxPktLen_LSB 0x14 +#define QIB_6120_IBCCtrl_MaxPktLen_RMASK 0x7FF +#define QIB_6120_IBCCtrl_LinkCmd_LSB 0x12 +#define QIB_6120_IBCCtrl_LinkCmd_RMASK 0x3 +#define QIB_6120_IBCCtrl_LinkInitCmd_LSB 0x10 +#define QIB_6120_IBCCtrl_LinkInitCmd_RMASK 0x3 +#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_LSB 0x8 +#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF +#define QIB_6120_IBCCtrl_FlowCtrlPeriod_LSB 0x0 +#define QIB_6120_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF + +#define QIB_6120_EXTStatus_OFFS 0xD0 +#define QIB_6120_EXTStatus_GPIOIn_LSB 0x30 +#define QIB_6120_EXTStatus_GPIOIn_RMASK 0xFFFF +#define QIB_6120_EXTStatus_Reserved_LSB 0x20 +#define QIB_6120_EXTStatus_Reserved_RMASK 0xFFFF +#define QIB_6120_EXTStatus_Reserved1_LSB 0x10 +#define QIB_6120_EXTStatus_Reserved1_RMASK 0xFFFF +#define QIB_6120_EXTStatus_MemBISTFoundErr_LSB 0xF +#define QIB_6120_EXTStatus_MemBISTFoundErr_RMASK 0x1 +#define QIB_6120_EXTStatus_MemBISTEndTest_LSB 0xE +#define QIB_6120_EXTStatus_MemBISTEndTest_RMASK 0x1 +#define QIB_6120_EXTStatus_Reserved2_LSB 0x0 +#define QIB_6120_EXTStatus_Reserved2_RMASK 0x3FFF + +#define QIB_6120_EXTCtrl_OFFS 0xD8 +#define QIB_6120_EXTCtrl_GPIOOe_LSB 0x30 +#define QIB_6120_EXTCtrl_GPIOOe_RMASK 0xFFFF +#define QIB_6120_EXTCtrl_GPIOInvert_LSB 0x20 +#define QIB_6120_EXTCtrl_GPIOInvert_RMASK 0xFFFF +#define QIB_6120_EXTCtrl_Reserved_LSB 0x4 +#define QIB_6120_EXTCtrl_Reserved_RMASK 0xFFFFFFF +#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_LSB 0x3 +#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1 +#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_LSB 0x2 +#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1 +#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_LSB 0x1 +#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1 +#define QIB_6120_EXTCtrl_LEDGblErrRedOff_LSB 0x0 +#define QIB_6120_EXTCtrl_LEDGblErrRedOff_RMASK 0x1 + +#define QIB_6120_GPIOOut_OFFS 0xE0 + +#define QIB_6120_GPIOMask_OFFS 0xE8 + +#define QIB_6120_GPIOStatus_OFFS 0xF0 + +#define QIB_6120_GPIOClear_OFFS 0xF8 + +#define QIB_6120_RcvCtrl_OFFS 0x100 +#define QIB_6120_RcvCtrl_TailUpd_LSB 0x1F +#define QIB_6120_RcvCtrl_TailUpd_RMASK 0x1 +#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_LSB 0x1E +#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1 +#define QIB_6120_RcvCtrl_Reserved_LSB 0x15 +#define QIB_6120_RcvCtrl_Reserved_RMASK 0x1FF +#define QIB_6120_RcvCtrl_IntrAvail_LSB 0x10 +#define QIB_6120_RcvCtrl_IntrAvail_RMASK 0x1F +#define QIB_6120_RcvCtrl_Reserved1_LSB 0x9 +#define QIB_6120_RcvCtrl_Reserved1_RMASK 0x7F +#define QIB_6120_RcvCtrl_Reserved2_LSB 0x5 +#define QIB_6120_RcvCtrl_Reserved2_RMASK 0xF +#define QIB_6120_RcvCtrl_PortEnable_LSB 0x0 +#define QIB_6120_RcvCtrl_PortEnable_RMASK 0x1F + +#define QIB_6120_RcvBTHQP_OFFS 0x108 +#define QIB_6120_RcvBTHQP_BTHQP_Mask_LSB 0x1E +#define QIB_6120_RcvBTHQP_BTHQP_Mask_RMASK 0x3 +#define QIB_6120_RcvBTHQP_Reserved_LSB 0x18 +#define QIB_6120_RcvBTHQP_Reserved_RMASK 0x3F +#define QIB_6120_RcvBTHQP_RcvBTHQP_LSB 0x0 +#define QIB_6120_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF + +#define QIB_6120_RcvHdrSize_OFFS 0x110 + +#define QIB_6120_RcvHdrCnt_OFFS 0x118 + +#define QIB_6120_RcvHdrEntSize_OFFS 0x120 + +#define QIB_6120_RcvTIDBase_OFFS 0x128 + +#define QIB_6120_RcvTIDCnt_OFFS 0x130 + +#define QIB_6120_RcvEgrBase_OFFS 0x138 + +#define QIB_6120_RcvEgrCnt_OFFS 0x140 + +#define QIB_6120_RcvBufBase_OFFS 0x148 + +#define QIB_6120_RcvBufSize_OFFS 0x150 + +#define QIB_6120_RxIntMemBase_OFFS 0x158 + +#define QIB_6120_RxIntMemSize_OFFS 0x160 + +#define QIB_6120_RcvPartitionKey_OFFS 0x168 + +#define QIB_6120_RcvPktLEDCnt_OFFS 0x178 +#define QIB_6120_RcvPktLEDCnt_ONperiod_LSB 0x20 +#define QIB_6120_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF +#define QIB_6120_RcvPktLEDCnt_OFFperiod_LSB 0x0 +#define QIB_6120_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF + +#define QIB_6120_SendCtrl_OFFS 0x1C0 +#define QIB_6120_SendCtrl_Disarm_LSB 0x1F +#define QIB_6120_SendCtrl_Disarm_RMASK 0x1 +#define QIB_6120_SendCtrl_Reserved_LSB 0x17 +#define QIB_6120_SendCtrl_Reserved_RMASK 0xFF +#define QIB_6120_SendCtrl_DisarmPIOBuf_LSB 0x10 +#define QIB_6120_SendCtrl_DisarmPIOBuf_RMASK 0x7F +#define QIB_6120_SendCtrl_Reserved1_LSB 0x4 +#define QIB_6120_SendCtrl_Reserved1_RMASK 0xFFF +#define QIB_6120_SendCtrl_PIOEnable_LSB 0x3 +#define QIB_6120_SendCtrl_PIOEnable_RMASK 0x1 +#define QIB_6120_SendCtrl_PIOBufAvailUpd_LSB 0x2 +#define QIB_6120_SendCtrl_PIOBufAvailUpd_RMASK 0x1 +#define QIB_6120_SendCtrl_PIOIntBufAvail_LSB 0x1 +#define QIB_6120_SendCtrl_PIOIntBufAvail_RMASK 0x1 +#define QIB_6120_SendCtrl_Abort_LSB 0x0 +#define QIB_6120_SendCtrl_Abort_RMASK 0x1 + +#define QIB_6120_SendPIOBufBase_OFFS 0x1C8 +#define QIB_6120_SendPIOBufBase_Reserved_LSB 0x35 +#define QIB_6120_SendPIOBufBase_Reserved_RMASK 0x7FF +#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_LSB 0x20 +#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF +#define QIB_6120_SendPIOBufBase_Reserved1_LSB 0x15 +#define QIB_6120_SendPIOBufBase_Reserved1_RMASK 0x7FF +#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_LSB 0x0 +#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF + +#define QIB_6120_SendPIOSize_OFFS 0x1D0 +#define QIB_6120_SendPIOSize_Reserved_LSB 0x2D +#define QIB_6120_SendPIOSize_Reserved_RMASK 0xFFFFF +#define QIB_6120_SendPIOSize_Size_LargePIO_LSB 0x20 +#define QIB_6120_SendPIOSize_Size_LargePIO_RMASK 0x1FFF +#define QIB_6120_SendPIOSize_Reserved1_LSB 0xC +#define QIB_6120_SendPIOSize_Reserved1_RMASK 0xFFFFF +#define QIB_6120_SendPIOSize_Size_SmallPIO_LSB 0x0 +#define QIB_6120_SendPIOSize_Size_SmallPIO_RMASK 0xFFF + +#define QIB_6120_SendPIOBufCnt_OFFS 0x1D8 +#define QIB_6120_SendPIOBufCnt_Reserved_LSB 0x24 +#define QIB_6120_SendPIOBufCnt_Reserved_RMASK 0xFFFFFFF +#define QIB_6120_SendPIOBufCnt_Num_LargePIO_LSB 0x20 +#define QIB_6120_SendPIOBufCnt_Num_LargePIO_RMASK 0xF +#define QIB_6120_SendPIOBufCnt_Reserved1_LSB 0x9 +#define QIB_6120_SendPIOBufCnt_Reserved1_RMASK 0x7FFFFF +#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_LSB 0x0 +#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_RMASK 0x1FF + +#define QIB_6120_SendPIOAvailAddr_OFFS 0x1E0 +#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_LSB 0x6 +#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_RMASK 0x3FFFFFFFF +#define QIB_6120_SendPIOAvailAddr_Reserved_LSB 0x0 +#define QIB_6120_SendPIOAvailAddr_Reserved_RMASK 0x3F + +#define QIB_6120_SendBufErr0_OFFS 0x240 +#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_LSB 0x0 +#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_RMASK 0x0 + +#define QIB_6120_RcvHdrAddr0_OFFS 0x280 +#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2 +#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF +#define QIB_6120_RcvHdrAddr0_Reserved_LSB 0x0 +#define QIB_6120_RcvHdrAddr0_Reserved_RMASK 0x3 + +#define QIB_6120_RcvHdrTailAddr0_OFFS 0x300 +#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2 +#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF +#define QIB_6120_RcvHdrTailAddr0_Reserved_LSB 0x0 +#define QIB_6120_RcvHdrTailAddr0_Reserved_RMASK 0x3 + +#define QIB_6120_SerdesCfg0_OFFS 0x3C0 +#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_LSB 0x3F +#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_RMASK 0x1 +#define QIB_6120_SerdesCfg0_Reserved_LSB 0x38 +#define QIB_6120_SerdesCfg0_Reserved_RMASK 0x7F +#define QIB_6120_SerdesCfg0_RxEqCtl_LSB 0x36 +#define QIB_6120_SerdesCfg0_RxEqCtl_RMASK 0x3 +#define QIB_6120_SerdesCfg0_TxTermAdj_LSB 0x34 +#define QIB_6120_SerdesCfg0_TxTermAdj_RMASK 0x3 +#define QIB_6120_SerdesCfg0_RxTermAdj_LSB 0x32 +#define QIB_6120_SerdesCfg0_RxTermAdj_RMASK 0x3 +#define QIB_6120_SerdesCfg0_TermAdj1_LSB 0x31 +#define QIB_6120_SerdesCfg0_TermAdj1_RMASK 0x1 +#define QIB_6120_SerdesCfg0_TermAdj0_LSB 0x30 +#define QIB_6120_SerdesCfg0_TermAdj0_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKA_LSB 0x2F +#define QIB_6120_SerdesCfg0_LPBKA_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKB_LSB 0x2E +#define QIB_6120_SerdesCfg0_LPBKB_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKC_LSB 0x2D +#define QIB_6120_SerdesCfg0_LPBKC_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKD_LSB 0x2C +#define QIB_6120_SerdesCfg0_LPBKD_RMASK 0x1 +#define QIB_6120_SerdesCfg0_PW_LSB 0x2B +#define QIB_6120_SerdesCfg0_PW_RMASK 0x1 +#define QIB_6120_SerdesCfg0_RefSel_LSB 0x29 +#define QIB_6120_SerdesCfg0_RefSel_RMASK 0x3 +#define QIB_6120_SerdesCfg0_ParReset_LSB 0x28 +#define QIB_6120_SerdesCfg0_ParReset_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ParLPBK_LSB 0x27 +#define QIB_6120_SerdesCfg0_ParLPBK_RMASK 0x1 +#define QIB_6120_SerdesCfg0_OffsetEn_LSB 0x26 +#define QIB_6120_SerdesCfg0_OffsetEn_RMASK 0x1 +#define QIB_6120_SerdesCfg0_Offset_LSB 0x1E +#define QIB_6120_SerdesCfg0_Offset_RMASK 0xFF +#define QIB_6120_SerdesCfg0_L2PwrDn_LSB 0x1D +#define QIB_6120_SerdesCfg0_L2PwrDn_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetPLL_LSB 0x1C +#define QIB_6120_SerdesCfg0_ResetPLL_RMASK 0x1 +#define QIB_6120_SerdesCfg0_RxTermEnX_LSB 0x18 +#define QIB_6120_SerdesCfg0_RxTermEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_BeaconTxEnX_LSB 0x14 +#define QIB_6120_SerdesCfg0_BeaconTxEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_RxDetEnX_LSB 0x10 +#define QIB_6120_SerdesCfg0_RxDetEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_TxIdeEnX_LSB 0xC +#define QIB_6120_SerdesCfg0_TxIdeEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_RxIdleEnX_LSB 0x8 +#define QIB_6120_SerdesCfg0_RxIdleEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_L1PwrDnA_LSB 0x7 +#define QIB_6120_SerdesCfg0_L1PwrDnA_RMASK 0x1 +#define QIB_6120_SerdesCfg0_L1PwrDnB_LSB 0x6 +#define QIB_6120_SerdesCfg0_L1PwrDnB_RMASK 0x1 +#define QIB_6120_SerdesCfg0_L1PwrDnC_LSB 0x5 +#define QIB_6120_SerdesCfg0_L1PwrDnC_RMASK 0x1 +#define QIB_6120_SerdesCfg0_L1PwrDnD_LSB 0x4 +#define QIB_6120_SerdesCfg0_L1PwrDnD_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetA_LSB 0x3 +#define QIB_6120_SerdesCfg0_ResetA_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetB_LSB 0x2 +#define QIB_6120_SerdesCfg0_ResetB_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetC_LSB 0x1 +#define QIB_6120_SerdesCfg0_ResetC_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetD_LSB 0x0 +#define QIB_6120_SerdesCfg0_ResetD_RMASK 0x1 + +#define QIB_6120_SerdesStat_OFFS 0x3D0 +#define QIB_6120_SerdesStat_Reserved_LSB 0xC +#define QIB_6120_SerdesStat_Reserved_RMASK 0xFFFFFFFFFFFFF +#define QIB_6120_SerdesStat_BeaconDetA_LSB 0xB +#define QIB_6120_SerdesStat_BeaconDetA_RMASK 0x1 +#define QIB_6120_SerdesStat_BeaconDetB_LSB 0xA +#define QIB_6120_SerdesStat_BeaconDetB_RMASK 0x1 +#define QIB_6120_SerdesStat_BeaconDetC_LSB 0x9 +#define QIB_6120_SerdesStat_BeaconDetC_RMASK 0x1 +#define QIB_6120_SerdesStat_BeaconDetD_LSB 0x8 +#define QIB_6120_SerdesStat_BeaconDetD_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetA_LSB 0x7 +#define QIB_6120_SerdesStat_RxDetA_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetB_LSB 0x6 +#define QIB_6120_SerdesStat_RxDetB_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetC_LSB 0x5 +#define QIB_6120_SerdesStat_RxDetC_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetD_LSB 0x4 +#define QIB_6120_SerdesStat_RxDetD_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetA_LSB 0x3 +#define QIB_6120_SerdesStat_TxIdleDetA_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetB_LSB 0x2 +#define QIB_6120_SerdesStat_TxIdleDetB_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetC_LSB 0x1 +#define QIB_6120_SerdesStat_TxIdleDetC_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetD_LSB 0x0 +#define QIB_6120_SerdesStat_TxIdleDetD_RMASK 0x1 + +#define QIB_6120_XGXSCfg_OFFS 0x3D8 +#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_LSB 0x3F +#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_RMASK 0x1 +#define QIB_6120_XGXSCfg_Reserved_LSB 0x17 +#define QIB_6120_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFF +#define QIB_6120_XGXSCfg_polarity_inv_LSB 0x13 +#define QIB_6120_XGXSCfg_polarity_inv_RMASK 0xF +#define QIB_6120_XGXSCfg_link_sync_mask_LSB 0x9 +#define QIB_6120_XGXSCfg_link_sync_mask_RMASK 0x3FF +#define QIB_6120_XGXSCfg_port_addr_LSB 0x4 +#define QIB_6120_XGXSCfg_port_addr_RMASK 0x1F +#define QIB_6120_XGXSCfg_mdd_30_LSB 0x3 +#define QIB_6120_XGXSCfg_mdd_30_RMASK 0x1 +#define QIB_6120_XGXSCfg_xcv_resetn_LSB 0x2 +#define QIB_6120_XGXSCfg_xcv_resetn_RMASK 0x1 +#define QIB_6120_XGXSCfg_Reserved1_LSB 0x1 +#define QIB_6120_XGXSCfg_Reserved1_RMASK 0x1 +#define QIB_6120_XGXSCfg_tx_rx_resetn_LSB 0x0 +#define QIB_6120_XGXSCfg_tx_rx_resetn_RMASK 0x1 + +#define QIB_6120_LBIntCnt_OFFS 0x12000 + +#define QIB_6120_LBFlowStallCnt_OFFS 0x12008 + +#define QIB_6120_TxUnsupVLErrCnt_OFFS 0x12018 + +#define QIB_6120_TxDataPktCnt_OFFS 0x12020 + +#define QIB_6120_TxFlowPktCnt_OFFS 0x12028 + +#define QIB_6120_TxDwordCnt_OFFS 0x12030 + +#define QIB_6120_TxLenErrCnt_OFFS 0x12038 + +#define QIB_6120_TxMaxMinLenErrCnt_OFFS 0x12040 + +#define QIB_6120_TxUnderrunCnt_OFFS 0x12048 + +#define QIB_6120_TxFlowStallCnt_OFFS 0x12050 + +#define QIB_6120_TxDroppedPktCnt_OFFS 0x12058 + +#define QIB_6120_RxDroppedPktCnt_OFFS 0x12060 + +#define QIB_6120_RxDataPktCnt_OFFS 0x12068 + +#define QIB_6120_RxFlowPktCnt_OFFS 0x12070 + +#define QIB_6120_RxDwordCnt_OFFS 0x12078 + +#define QIB_6120_RxLenErrCnt_OFFS 0x12080 + +#define QIB_6120_RxMaxMinLenErrCnt_OFFS 0x12088 + +#define QIB_6120_RxICRCErrCnt_OFFS 0x12090 + +#define QIB_6120_RxVCRCErrCnt_OFFS 0x12098 + +#define QIB_6120_RxFlowCtrlErrCnt_OFFS 0x120A0 + +#define QIB_6120_RxBadFormatCnt_OFFS 0x120A8 + +#define QIB_6120_RxLinkProblemCnt_OFFS 0x120B0 + +#define QIB_6120_RxEBPCnt_OFFS 0x120B8 + +#define QIB_6120_RxLPCRCErrCnt_OFFS 0x120C0 + +#define QIB_6120_RxBufOvflCnt_OFFS 0x120C8 + +#define QIB_6120_RxTIDFullErrCnt_OFFS 0x120D0 + +#define QIB_6120_RxTIDValidErrCnt_OFFS 0x120D8 + +#define QIB_6120_RxPKeyMismatchCnt_OFFS 0x120E0 + +#define QIB_6120_RxP0HdrEgrOvflCnt_OFFS 0x120E8 + +#define QIB_6120_IBStatusChangeCnt_OFFS 0x12140 + +#define QIB_6120_IBLinkErrRecoveryCnt_OFFS 0x12148 + +#define QIB_6120_IBLinkDownedCnt_OFFS 0x12150 + +#define QIB_6120_IBSymbolErrCnt_OFFS 0x12158 + +#define QIB_6120_PcieRetryBufDiagQwordCnt_OFFS 0x12170 + +#define QIB_6120_RcvEgrArray0_OFFS 0x14000 + +#define QIB_6120_RcvTIDArray0_OFFS 0x54000 + +#define QIB_6120_PIOLaunchFIFO_OFFS 0x64000 + +#define QIB_6120_SendPIOpbcCache_OFFS 0x64800 + +#define QIB_6120_RcvBuf1_OFFS 0x72000 + +#define QIB_6120_RcvBuf2_OFFS 0x75000 + +#define QIB_6120_RcvFlags_OFFS 0x77000 + +#define QIB_6120_RcvLookupBuf1_OFFS 0x79000 + +#define QIB_6120_RcvDMABuf_OFFS 0x7B000 + +#define QIB_6120_MiscRXEIntMem_OFFS 0x7C000 + +#define QIB_6120_PCIERcvBuf_OFFS 0x80000 + +#define QIB_6120_PCIERetryBuf_OFFS 0x82000 + +#define QIB_6120_PCIERcvBufRdToWrAddr_OFFS 0x84000 + +#define QIB_6120_PIOBuf0_MA_OFFS 0x100000 diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h new file mode 100644 index 00000000..a5356cb4 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_7220.h @@ -0,0 +1,149 @@ +#ifndef _QIB_7220_H +#define _QIB_7220_H +/* + * Copyright (c) 2007, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* grab register-defs auto-generated by HW */ +#include "qib_7220_regs.h" + +/* The number of eager receive TIDs for context zero. */ +#define IBA7220_KRCVEGRCNT 2048U + +#define IB_7220_LT_STATE_CFGRCVFCFG 0x09 +#define IB_7220_LT_STATE_CFGWAITRMT 0x0a +#define IB_7220_LT_STATE_TXREVLANES 0x0d +#define IB_7220_LT_STATE_CFGENH 0x10 + +struct qib_chip_specific { + u64 __iomem *cregbase; + u64 *cntrs; + u64 *portcntrs; + spinlock_t sdepb_lock; /* serdes EPB bus */ + spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */ + spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */ + u64 hwerrmask; + u64 errormask; + u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */ + u64 gpio_mask; /* shadow the gpio mask register */ + u64 extctrl; /* shadow the gpio output enable, etc... */ + u32 ncntrs; + u32 nportcntrs; + u32 cntrnamelen; + u32 portcntrnamelen; + u32 numctxts; + u32 rcvegrcnt; + u32 autoneg_tries; + u32 serdes_first_init_done; + u32 sdmabufcnt; + u32 lastbuf_for_pio; + u32 updthresh; /* current AvailUpdThld */ + u32 updthresh_dflt; /* default AvailUpdThld */ + int irq; + u8 presets_needed; + u8 relock_timer_active; + char emsgbuf[128]; + char sdmamsgbuf[192]; + char bitsmsgbuf[64]; + struct timer_list relock_timer; + unsigned int relock_interval; /* in jiffies */ +}; + +struct qib_chippport_specific { + struct qib_pportdata pportdata; + wait_queue_head_t autoneg_wait; + struct delayed_work autoneg_work; + struct timer_list chase_timer; + /* + * these 5 fields are used to establish deltas for IB symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + u64 ibcctrl; /* kr_ibcctrl shadow */ + u64 ibcddrctrl; /* kr_ibcddrctrl shadow */ + unsigned long chase_end; + u32 last_delay_mult; +}; + +/* + * This header file provides the declarations and common definitions + * for (mostly) manipulation of the SerDes blocks within the IBA7220. + * the functions declared should only be called from within other + * 7220-related files such as qib_iba7220.c or qib_sd7220.c. + */ +int qib_sd7220_presets(struct qib_devdata *dd); +int qib_sd7220_init(struct qib_devdata *dd); +void qib_sd7220_clr_ibpar(struct qib_devdata *); +/* + * Below used for sdnum parameter, selecting one of the two sections + * used for PCIe, or the single SerDes used for IB, which is the + * only one currently used + */ +#define IB_7220_SERDES 2 + +static inline u32 qib_read_kreg32(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readl((u32 __iomem *)&dd->kregbase[regno]); +} + +static inline u64 qib_read_kreg64(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + + return readq(&dd->kregbase[regno]); +} + +static inline void qib_write_kreg(const struct qib_devdata *dd, + const u16 regno, u64 value) +{ + if (dd->kregbase) + writeq(value, &dd->kregbase[regno]); +} + +void set_7220_relock_poll(struct qib_devdata *, int); +void shutdown_7220_relock_poll(struct qib_devdata *); +void toggle_7220_rclkrls(struct qib_devdata *); + + +#endif /* _QIB_7220_H */ diff --git a/drivers/infiniband/hw/qib/qib_7220_regs.h b/drivers/infiniband/hw/qib/qib_7220_regs.h new file mode 100644 index 00000000..0da5bb75 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_7220_regs.h @@ -0,0 +1,1496 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* This file is mechanically generated from RTL. Any hand-edits will be lost! */ + +#define QIB_7220_Revision_OFFS 0x0 +#define QIB_7220_Revision_R_Simulator_LSB 0x3F +#define QIB_7220_Revision_R_Simulator_RMASK 0x1 +#define QIB_7220_Revision_R_Emulation_LSB 0x3E +#define QIB_7220_Revision_R_Emulation_RMASK 0x1 +#define QIB_7220_Revision_R_Emulation_Revcode_LSB 0x28 +#define QIB_7220_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF +#define QIB_7220_Revision_BoardID_LSB 0x20 +#define QIB_7220_Revision_BoardID_RMASK 0xFF +#define QIB_7220_Revision_R_SW_LSB 0x18 +#define QIB_7220_Revision_R_SW_RMASK 0xFF +#define QIB_7220_Revision_R_Arch_LSB 0x10 +#define QIB_7220_Revision_R_Arch_RMASK 0xFF +#define QIB_7220_Revision_R_ChipRevMajor_LSB 0x8 +#define QIB_7220_Revision_R_ChipRevMajor_RMASK 0xFF +#define QIB_7220_Revision_R_ChipRevMinor_LSB 0x0 +#define QIB_7220_Revision_R_ChipRevMinor_RMASK 0xFF + +#define QIB_7220_Control_OFFS 0x8 +#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_LSB 0x7 +#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_RMASK 0x1 +#define QIB_7220_Control_PCIECplQDiagEn_LSB 0x6 +#define QIB_7220_Control_PCIECplQDiagEn_RMASK 0x1 +#define QIB_7220_Control_Reserved_LSB 0x5 +#define QIB_7220_Control_Reserved_RMASK 0x1 +#define QIB_7220_Control_TxLatency_LSB 0x4 +#define QIB_7220_Control_TxLatency_RMASK 0x1 +#define QIB_7220_Control_PCIERetryBufDiagEn_LSB 0x3 +#define QIB_7220_Control_PCIERetryBufDiagEn_RMASK 0x1 +#define QIB_7220_Control_LinkEn_LSB 0x2 +#define QIB_7220_Control_LinkEn_RMASK 0x1 +#define QIB_7220_Control_FreezeMode_LSB 0x1 +#define QIB_7220_Control_FreezeMode_RMASK 0x1 +#define QIB_7220_Control_SyncReset_LSB 0x0 +#define QIB_7220_Control_SyncReset_RMASK 0x1 + +#define QIB_7220_PageAlign_OFFS 0x10 + +#define QIB_7220_PortCnt_OFFS 0x18 + +#define QIB_7220_SendRegBase_OFFS 0x30 + +#define QIB_7220_UserRegBase_OFFS 0x38 + +#define QIB_7220_CntrRegBase_OFFS 0x40 + +#define QIB_7220_Scratch_OFFS 0x48 + +#define QIB_7220_IntMask_OFFS 0x68 +#define QIB_7220_IntMask_SDmaIntMask_LSB 0x3F +#define QIB_7220_IntMask_SDmaIntMask_RMASK 0x1 +#define QIB_7220_IntMask_SDmaDisabledMasked_LSB 0x3E +#define QIB_7220_IntMask_SDmaDisabledMasked_RMASK 0x1 +#define QIB_7220_IntMask_Reserved_LSB 0x31 +#define QIB_7220_IntMask_Reserved_RMASK 0x1FFF +#define QIB_7220_IntMask_RcvUrg16IntMask_LSB 0x30 +#define QIB_7220_IntMask_RcvUrg16IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg15IntMask_LSB 0x2F +#define QIB_7220_IntMask_RcvUrg15IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg14IntMask_LSB 0x2E +#define QIB_7220_IntMask_RcvUrg14IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg13IntMask_LSB 0x2D +#define QIB_7220_IntMask_RcvUrg13IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg12IntMask_LSB 0x2C +#define QIB_7220_IntMask_RcvUrg12IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg11IntMask_LSB 0x2B +#define QIB_7220_IntMask_RcvUrg11IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg10IntMask_LSB 0x2A +#define QIB_7220_IntMask_RcvUrg10IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg9IntMask_LSB 0x29 +#define QIB_7220_IntMask_RcvUrg9IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg8IntMask_LSB 0x28 +#define QIB_7220_IntMask_RcvUrg8IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg7IntMask_LSB 0x27 +#define QIB_7220_IntMask_RcvUrg7IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg6IntMask_LSB 0x26 +#define QIB_7220_IntMask_RcvUrg6IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg5IntMask_LSB 0x25 +#define QIB_7220_IntMask_RcvUrg5IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg4IntMask_LSB 0x24 +#define QIB_7220_IntMask_RcvUrg4IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg3IntMask_LSB 0x23 +#define QIB_7220_IntMask_RcvUrg3IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg2IntMask_LSB 0x22 +#define QIB_7220_IntMask_RcvUrg2IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg1IntMask_LSB 0x21 +#define QIB_7220_IntMask_RcvUrg1IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg0IntMask_LSB 0x20 +#define QIB_7220_IntMask_RcvUrg0IntMask_RMASK 0x1 +#define QIB_7220_IntMask_ErrorIntMask_LSB 0x1F +#define QIB_7220_IntMask_ErrorIntMask_RMASK 0x1 +#define QIB_7220_IntMask_PioSetIntMask_LSB 0x1E +#define QIB_7220_IntMask_PioSetIntMask_RMASK 0x1 +#define QIB_7220_IntMask_PioBufAvailIntMask_LSB 0x1D +#define QIB_7220_IntMask_PioBufAvailIntMask_RMASK 0x1 +#define QIB_7220_IntMask_assertGPIOIntMask_LSB 0x1C +#define QIB_7220_IntMask_assertGPIOIntMask_RMASK 0x1 +#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_LSB 0x1B +#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_RMASK 0x1 +#define QIB_7220_IntMask_JIntMask_LSB 0x1A +#define QIB_7220_IntMask_JIntMask_RMASK 0x1 +#define QIB_7220_IntMask_Reserved1_LSB 0x11 +#define QIB_7220_IntMask_Reserved1_RMASK 0x1FF +#define QIB_7220_IntMask_RcvAvail16IntMask_LSB 0x10 +#define QIB_7220_IntMask_RcvAvail16IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail15IntMask_LSB 0xF +#define QIB_7220_IntMask_RcvAvail15IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail14IntMask_LSB 0xE +#define QIB_7220_IntMask_RcvAvail14IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail13IntMask_LSB 0xD +#define QIB_7220_IntMask_RcvAvail13IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail12IntMask_LSB 0xC +#define QIB_7220_IntMask_RcvAvail12IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail11IntMask_LSB 0xB +#define QIB_7220_IntMask_RcvAvail11IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail10IntMask_LSB 0xA +#define QIB_7220_IntMask_RcvAvail10IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail9IntMask_LSB 0x9 +#define QIB_7220_IntMask_RcvAvail9IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail8IntMask_LSB 0x8 +#define QIB_7220_IntMask_RcvAvail8IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail7IntMask_LSB 0x7 +#define QIB_7220_IntMask_RcvAvail7IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail6IntMask_LSB 0x6 +#define QIB_7220_IntMask_RcvAvail6IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail5IntMask_LSB 0x5 +#define QIB_7220_IntMask_RcvAvail5IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail4IntMask_LSB 0x4 +#define QIB_7220_IntMask_RcvAvail4IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail3IntMask_LSB 0x3 +#define QIB_7220_IntMask_RcvAvail3IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail2IntMask_LSB 0x2 +#define QIB_7220_IntMask_RcvAvail2IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail1IntMask_LSB 0x1 +#define QIB_7220_IntMask_RcvAvail1IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail0IntMask_LSB 0x0 +#define QIB_7220_IntMask_RcvAvail0IntMask_RMASK 0x1 + +#define QIB_7220_IntStatus_OFFS 0x70 +#define QIB_7220_IntStatus_SDmaInt_LSB 0x3F +#define QIB_7220_IntStatus_SDmaInt_RMASK 0x1 +#define QIB_7220_IntStatus_SDmaDisabled_LSB 0x3E +#define QIB_7220_IntStatus_SDmaDisabled_RMASK 0x1 +#define QIB_7220_IntStatus_Reserved_LSB 0x31 +#define QIB_7220_IntStatus_Reserved_RMASK 0x1FFF +#define QIB_7220_IntStatus_RcvUrg16_LSB 0x30 +#define QIB_7220_IntStatus_RcvUrg16_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg15_LSB 0x2F +#define QIB_7220_IntStatus_RcvUrg15_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg14_LSB 0x2E +#define QIB_7220_IntStatus_RcvUrg14_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg13_LSB 0x2D +#define QIB_7220_IntStatus_RcvUrg13_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg12_LSB 0x2C +#define QIB_7220_IntStatus_RcvUrg12_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg11_LSB 0x2B +#define QIB_7220_IntStatus_RcvUrg11_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg10_LSB 0x2A +#define QIB_7220_IntStatus_RcvUrg10_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg9_LSB 0x29 +#define QIB_7220_IntStatus_RcvUrg9_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg8_LSB 0x28 +#define QIB_7220_IntStatus_RcvUrg8_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg7_LSB 0x27 +#define QIB_7220_IntStatus_RcvUrg7_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg6_LSB 0x26 +#define QIB_7220_IntStatus_RcvUrg6_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg5_LSB 0x25 +#define QIB_7220_IntStatus_RcvUrg5_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg4_LSB 0x24 +#define QIB_7220_IntStatus_RcvUrg4_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg3_LSB 0x23 +#define QIB_7220_IntStatus_RcvUrg3_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg2_LSB 0x22 +#define QIB_7220_IntStatus_RcvUrg2_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg1_LSB 0x21 +#define QIB_7220_IntStatus_RcvUrg1_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg0_LSB 0x20 +#define QIB_7220_IntStatus_RcvUrg0_RMASK 0x1 +#define QIB_7220_IntStatus_Error_LSB 0x1F +#define QIB_7220_IntStatus_Error_RMASK 0x1 +#define QIB_7220_IntStatus_PioSent_LSB 0x1E +#define QIB_7220_IntStatus_PioSent_RMASK 0x1 +#define QIB_7220_IntStatus_PioBufAvail_LSB 0x1D +#define QIB_7220_IntStatus_PioBufAvail_RMASK 0x1 +#define QIB_7220_IntStatus_assertGPIO_LSB 0x1C +#define QIB_7220_IntStatus_assertGPIO_RMASK 0x1 +#define QIB_7220_IntStatus_IBSerdesTrimDone_LSB 0x1B +#define QIB_7220_IntStatus_IBSerdesTrimDone_RMASK 0x1 +#define QIB_7220_IntStatus_JInt_LSB 0x1A +#define QIB_7220_IntStatus_JInt_RMASK 0x1 +#define QIB_7220_IntStatus_Reserved1_LSB 0x11 +#define QIB_7220_IntStatus_Reserved1_RMASK 0x1FF +#define QIB_7220_IntStatus_RcvAvail16_LSB 0x10 +#define QIB_7220_IntStatus_RcvAvail16_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail15_LSB 0xF +#define QIB_7220_IntStatus_RcvAvail15_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail14_LSB 0xE +#define QIB_7220_IntStatus_RcvAvail14_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail13_LSB 0xD +#define QIB_7220_IntStatus_RcvAvail13_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail12_LSB 0xC +#define QIB_7220_IntStatus_RcvAvail12_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail11_LSB 0xB +#define QIB_7220_IntStatus_RcvAvail11_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail10_LSB 0xA +#define QIB_7220_IntStatus_RcvAvail10_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail9_LSB 0x9 +#define QIB_7220_IntStatus_RcvAvail9_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail8_LSB 0x8 +#define QIB_7220_IntStatus_RcvAvail8_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail7_LSB 0x7 +#define QIB_7220_IntStatus_RcvAvail7_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail6_LSB 0x6 +#define QIB_7220_IntStatus_RcvAvail6_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail5_LSB 0x5 +#define QIB_7220_IntStatus_RcvAvail5_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail4_LSB 0x4 +#define QIB_7220_IntStatus_RcvAvail4_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail3_LSB 0x3 +#define QIB_7220_IntStatus_RcvAvail3_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail2_LSB 0x2 +#define QIB_7220_IntStatus_RcvAvail2_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail1_LSB 0x1 +#define QIB_7220_IntStatus_RcvAvail1_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail0_LSB 0x0 +#define QIB_7220_IntStatus_RcvAvail0_RMASK 0x1 + +#define QIB_7220_IntClear_OFFS 0x78 +#define QIB_7220_IntClear_SDmaIntClear_LSB 0x3F +#define QIB_7220_IntClear_SDmaIntClear_RMASK 0x1 +#define QIB_7220_IntClear_SDmaDisabledClear_LSB 0x3E +#define QIB_7220_IntClear_SDmaDisabledClear_RMASK 0x1 +#define QIB_7220_IntClear_Reserved_LSB 0x31 +#define QIB_7220_IntClear_Reserved_RMASK 0x1FFF +#define QIB_7220_IntClear_RcvUrg16IntClear_LSB 0x30 +#define QIB_7220_IntClear_RcvUrg16IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg15IntClear_LSB 0x2F +#define QIB_7220_IntClear_RcvUrg15IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg14IntClear_LSB 0x2E +#define QIB_7220_IntClear_RcvUrg14IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg13IntClear_LSB 0x2D +#define QIB_7220_IntClear_RcvUrg13IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg12IntClear_LSB 0x2C +#define QIB_7220_IntClear_RcvUrg12IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg11IntClear_LSB 0x2B +#define QIB_7220_IntClear_RcvUrg11IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg10IntClear_LSB 0x2A +#define QIB_7220_IntClear_RcvUrg10IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg9IntClear_LSB 0x29 +#define QIB_7220_IntClear_RcvUrg9IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg8IntClear_LSB 0x28 +#define QIB_7220_IntClear_RcvUrg8IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg7IntClear_LSB 0x27 +#define QIB_7220_IntClear_RcvUrg7IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg6IntClear_LSB 0x26 +#define QIB_7220_IntClear_RcvUrg6IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg5IntClear_LSB 0x25 +#define QIB_7220_IntClear_RcvUrg5IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg4IntClear_LSB 0x24 +#define QIB_7220_IntClear_RcvUrg4IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg3IntClear_LSB 0x23 +#define QIB_7220_IntClear_RcvUrg3IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg2IntClear_LSB 0x22 +#define QIB_7220_IntClear_RcvUrg2IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg1IntClear_LSB 0x21 +#define QIB_7220_IntClear_RcvUrg1IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg0IntClear_LSB 0x20 +#define QIB_7220_IntClear_RcvUrg0IntClear_RMASK 0x1 +#define QIB_7220_IntClear_ErrorIntClear_LSB 0x1F +#define QIB_7220_IntClear_ErrorIntClear_RMASK 0x1 +#define QIB_7220_IntClear_PioSetIntClear_LSB 0x1E +#define QIB_7220_IntClear_PioSetIntClear_RMASK 0x1 +#define QIB_7220_IntClear_PioBufAvailIntClear_LSB 0x1D +#define QIB_7220_IntClear_PioBufAvailIntClear_RMASK 0x1 +#define QIB_7220_IntClear_assertGPIOIntClear_LSB 0x1C +#define QIB_7220_IntClear_assertGPIOIntClear_RMASK 0x1 +#define QIB_7220_IntClear_IBSerdesTrimDoneClear_LSB 0x1B +#define QIB_7220_IntClear_IBSerdesTrimDoneClear_RMASK 0x1 +#define QIB_7220_IntClear_JIntClear_LSB 0x1A +#define QIB_7220_IntClear_JIntClear_RMASK 0x1 +#define QIB_7220_IntClear_Reserved1_LSB 0x11 +#define QIB_7220_IntClear_Reserved1_RMASK 0x1FF +#define QIB_7220_IntClear_RcvAvail16IntClear_LSB 0x10 +#define QIB_7220_IntClear_RcvAvail16IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail15IntClear_LSB 0xF +#define QIB_7220_IntClear_RcvAvail15IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail14IntClear_LSB 0xE +#define QIB_7220_IntClear_RcvAvail14IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail13IntClear_LSB 0xD +#define QIB_7220_IntClear_RcvAvail13IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail12IntClear_LSB 0xC +#define QIB_7220_IntClear_RcvAvail12IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail11IntClear_LSB 0xB +#define QIB_7220_IntClear_RcvAvail11IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail10IntClear_LSB 0xA +#define QIB_7220_IntClear_RcvAvail10IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail9IntClear_LSB 0x9 +#define QIB_7220_IntClear_RcvAvail9IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail8IntClear_LSB 0x8 +#define QIB_7220_IntClear_RcvAvail8IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail7IntClear_LSB 0x7 +#define QIB_7220_IntClear_RcvAvail7IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail6IntClear_LSB 0x6 +#define QIB_7220_IntClear_RcvAvail6IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail5IntClear_LSB 0x5 +#define QIB_7220_IntClear_RcvAvail5IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail4IntClear_LSB 0x4 +#define QIB_7220_IntClear_RcvAvail4IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail3IntClear_LSB 0x3 +#define QIB_7220_IntClear_RcvAvail3IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail2IntClear_LSB 0x2 +#define QIB_7220_IntClear_RcvAvail2IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail1IntClear_LSB 0x1 +#define QIB_7220_IntClear_RcvAvail1IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail0IntClear_LSB 0x0 +#define QIB_7220_IntClear_RcvAvail0IntClear_RMASK 0x1 + +#define QIB_7220_ErrMask_OFFS 0x80 +#define QIB_7220_ErrMask_Reserved_LSB 0x36 +#define QIB_7220_ErrMask_Reserved_RMASK 0x3FF +#define QIB_7220_ErrMask_InvalidEEPCmdMask_LSB 0x35 +#define QIB_7220_ErrMask_InvalidEEPCmdMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_LSB 0x34 +#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_HardwareErrMask_LSB 0x33 +#define QIB_7220_ErrMask_HardwareErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_ResetNegatedMask_LSB 0x32 +#define QIB_7220_ErrMask_ResetNegatedMask_RMASK 0x1 +#define QIB_7220_ErrMask_InvalidAddrErrMask_LSB 0x31 +#define QIB_7220_ErrMask_InvalidAddrErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_IBStatusChangedMask_LSB 0x30 +#define QIB_7220_ErrMask_IBStatusChangedMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_LSB 0x2F +#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaMissingDwErrMask_LSB 0x2E +#define QIB_7220_ErrMask_SDmaMissingDwErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaDwEnErrMask_LSB 0x2D +#define QIB_7220_ErrMask_SDmaDwEnErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaRpyTagErrMask_LSB 0x2C +#define QIB_7220_ErrMask_SDmaRpyTagErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDma1stDescErrMask_LSB 0x2B +#define QIB_7220_ErrMask_SDma1stDescErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaBaseErrMask_LSB 0x2A +#define QIB_7220_ErrMask_SDmaBaseErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_LSB 0x29 +#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_LSB 0x28 +#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_LSB 0x27 +#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendBufMisuseErrMask_LSB 0x26 +#define QIB_7220_ErrMask_SendBufMisuseErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_LSB 0x25 +#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24 +#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_LSB 0x23 +#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_LSB 0x22 +#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21 +#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendPktLenErrMask_LSB 0x20 +#define QIB_7220_ErrMask_SendPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendUnderRunErrMask_LSB 0x1F +#define QIB_7220_ErrMask_SendUnderRunErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendMaxPktLenErrMask_LSB 0x1E +#define QIB_7220_ErrMask_SendMaxPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendMinPktLenErrMask_LSB 0x1D +#define QIB_7220_ErrMask_SendMinPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaDisabledErrMask_LSB 0x1C +#define QIB_7220_ErrMask_SDmaDisabledErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B +#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_Reserved1_LSB 0x12 +#define QIB_7220_ErrMask_Reserved1_RMASK 0x1FF +#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_LSB 0x11 +#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvHdrErrMask_LSB 0x10 +#define QIB_7220_ErrMask_RcvHdrErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvHdrLenErrMask_LSB 0xF +#define QIB_7220_ErrMask_RcvHdrLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvBadTidErrMask_LSB 0xE +#define QIB_7220_ErrMask_RcvBadTidErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvHdrFullErrMask_LSB 0xD +#define QIB_7220_ErrMask_RcvHdrFullErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvEgrFullErrMask_LSB 0xC +#define QIB_7220_ErrMask_RcvEgrFullErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvBadVersionErrMask_LSB 0xB +#define QIB_7220_ErrMask_RcvBadVersionErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvIBFlowErrMask_LSB 0xA +#define QIB_7220_ErrMask_RcvIBFlowErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvEBPErrMask_LSB 0x9 +#define QIB_7220_ErrMask_RcvEBPErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8 +#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7 +#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvShortPktLenErrMask_LSB 0x6 +#define QIB_7220_ErrMask_RcvShortPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvLongPktLenErrMask_LSB 0x5 +#define QIB_7220_ErrMask_RcvLongPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_LSB 0x4 +#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvMinPktLenErrMask_LSB 0x3 +#define QIB_7220_ErrMask_RcvMinPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvICRCErrMask_LSB 0x2 +#define QIB_7220_ErrMask_RcvICRCErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvVCRCErrMask_LSB 0x1 +#define QIB_7220_ErrMask_RcvVCRCErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvFormatErrMask_LSB 0x0 +#define QIB_7220_ErrMask_RcvFormatErrMask_RMASK 0x1 + +#define QIB_7220_ErrStatus_OFFS 0x88 +#define QIB_7220_ErrStatus_Reserved_LSB 0x36 +#define QIB_7220_ErrStatus_Reserved_RMASK 0x3FF +#define QIB_7220_ErrStatus_InvalidEEPCmdErr_LSB 0x35 +#define QIB_7220_ErrStatus_InvalidEEPCmdErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_LSB 0x34 +#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_RMASK 0x1 +#define QIB_7220_ErrStatus_HardwareErr_LSB 0x33 +#define QIB_7220_ErrStatus_HardwareErr_RMASK 0x1 +#define QIB_7220_ErrStatus_ResetNegated_LSB 0x32 +#define QIB_7220_ErrStatus_ResetNegated_RMASK 0x1 +#define QIB_7220_ErrStatus_InvalidAddrErr_LSB 0x31 +#define QIB_7220_ErrStatus_InvalidAddrErr_RMASK 0x1 +#define QIB_7220_ErrStatus_IBStatusChanged_LSB 0x30 +#define QIB_7220_ErrStatus_IBStatusChanged_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaUnexpDataErr_LSB 0x2F +#define QIB_7220_ErrStatus_SDmaUnexpDataErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaMissingDwErr_LSB 0x2E +#define QIB_7220_ErrStatus_SDmaMissingDwErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaDwEnErr_LSB 0x2D +#define QIB_7220_ErrStatus_SDmaDwEnErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaRpyTagErr_LSB 0x2C +#define QIB_7220_ErrStatus_SDmaRpyTagErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDma1stDescErr_LSB 0x2B +#define QIB_7220_ErrStatus_SDma1stDescErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaBaseErr_LSB 0x2A +#define QIB_7220_ErrStatus_SDmaBaseErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_LSB 0x29 +#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_LSB 0x28 +#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaGenMismatchErr_LSB 0x27 +#define QIB_7220_ErrStatus_SDmaGenMismatchErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendBufMisuseErr_LSB 0x26 +#define QIB_7220_ErrStatus_SendBufMisuseErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendUnsupportedVLErr_LSB 0x25 +#define QIB_7220_ErrStatus_SendUnsupportedVLErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24 +#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendPioArmLaunchErr_LSB 0x23 +#define QIB_7220_ErrStatus_SendPioArmLaunchErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendDroppedDataPktErr_LSB 0x22 +#define QIB_7220_ErrStatus_SendDroppedDataPktErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_LSB 0x21 +#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendPktLenErr_LSB 0x20 +#define QIB_7220_ErrStatus_SendPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendUnderRunErr_LSB 0x1F +#define QIB_7220_ErrStatus_SendUnderRunErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendMaxPktLenErr_LSB 0x1E +#define QIB_7220_ErrStatus_SendMaxPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendMinPktLenErr_LSB 0x1D +#define QIB_7220_ErrStatus_SendMinPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaDisabledErr_LSB 0x1C +#define QIB_7220_ErrStatus_SDmaDisabledErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendSpecialTriggerErr_LSB 0x1B +#define QIB_7220_ErrStatus_SendSpecialTriggerErr_RMASK 0x1 +#define QIB_7220_ErrStatus_Reserved1_LSB 0x12 +#define QIB_7220_ErrStatus_Reserved1_RMASK 0x1FF +#define QIB_7220_ErrStatus_RcvIBLostLinkErr_LSB 0x11 +#define QIB_7220_ErrStatus_RcvIBLostLinkErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvHdrErr_LSB 0x10 +#define QIB_7220_ErrStatus_RcvHdrErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvHdrLenErr_LSB 0xF +#define QIB_7220_ErrStatus_RcvHdrLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvBadTidErr_LSB 0xE +#define QIB_7220_ErrStatus_RcvBadTidErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvHdrFullErr_LSB 0xD +#define QIB_7220_ErrStatus_RcvHdrFullErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvEgrFullErr_LSB 0xC +#define QIB_7220_ErrStatus_RcvEgrFullErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvBadVersionErr_LSB 0xB +#define QIB_7220_ErrStatus_RcvBadVersionErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvIBFlowErr_LSB 0xA +#define QIB_7220_ErrStatus_RcvIBFlowErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvEBPErr_LSB 0x9 +#define QIB_7220_ErrStatus_RcvEBPErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_LSB 0x8 +#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_LSB 0x7 +#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvShortPktLenErr_LSB 0x6 +#define QIB_7220_ErrStatus_RcvShortPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvLongPktLenErr_LSB 0x5 +#define QIB_7220_ErrStatus_RcvLongPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvMaxPktLenErr_LSB 0x4 +#define QIB_7220_ErrStatus_RcvMaxPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvMinPktLenErr_LSB 0x3 +#define QIB_7220_ErrStatus_RcvMinPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvICRCErr_LSB 0x2 +#define QIB_7220_ErrStatus_RcvICRCErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvVCRCErr_LSB 0x1 +#define QIB_7220_ErrStatus_RcvVCRCErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvFormatErr_LSB 0x0 +#define QIB_7220_ErrStatus_RcvFormatErr_RMASK 0x1 + +#define QIB_7220_ErrClear_OFFS 0x90 +#define QIB_7220_ErrClear_Reserved_LSB 0x36 +#define QIB_7220_ErrClear_Reserved_RMASK 0x3FF +#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_LSB 0x35 +#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_LSB 0x34 +#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_HardwareErrClear_LSB 0x33 +#define QIB_7220_ErrClear_HardwareErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_ResetNegatedClear_LSB 0x32 +#define QIB_7220_ErrClear_ResetNegatedClear_RMASK 0x1 +#define QIB_7220_ErrClear_InvalidAddrErrClear_LSB 0x31 +#define QIB_7220_ErrClear_InvalidAddrErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_IBStatusChangedClear_LSB 0x30 +#define QIB_7220_ErrClear_IBStatusChangedClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_LSB 0x2F +#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaMissingDwErrClear_LSB 0x2E +#define QIB_7220_ErrClear_SDmaMissingDwErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaDwEnErrClear_LSB 0x2D +#define QIB_7220_ErrClear_SDmaDwEnErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaRpyTagErrClear_LSB 0x2C +#define QIB_7220_ErrClear_SDmaRpyTagErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDma1stDescErrClear_LSB 0x2B +#define QIB_7220_ErrClear_SDma1stDescErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaBaseErrClear_LSB 0x2A +#define QIB_7220_ErrClear_SDmaBaseErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_LSB 0x29 +#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_LSB 0x28 +#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_LSB 0x27 +#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendBufMisuseErrClear_LSB 0x26 +#define QIB_7220_ErrClear_SendBufMisuseErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_LSB 0x25 +#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24 +#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_LSB 0x23 +#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_LSB 0x22 +#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21 +#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendPktLenErrClear_LSB 0x20 +#define QIB_7220_ErrClear_SendPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendUnderRunErrClear_LSB 0x1F +#define QIB_7220_ErrClear_SendUnderRunErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendMaxPktLenErrClear_LSB 0x1E +#define QIB_7220_ErrClear_SendMaxPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendMinPktLenErrClear_LSB 0x1D +#define QIB_7220_ErrClear_SendMinPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaDisabledErrClear_LSB 0x1C +#define QIB_7220_ErrClear_SDmaDisabledErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B +#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_Reserved1_LSB 0x12 +#define QIB_7220_ErrClear_Reserved1_RMASK 0x1FF +#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_LSB 0x11 +#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvHdrErrClear_LSB 0x10 +#define QIB_7220_ErrClear_RcvHdrErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvHdrLenErrClear_LSB 0xF +#define QIB_7220_ErrClear_RcvHdrLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvBadTidErrClear_LSB 0xE +#define QIB_7220_ErrClear_RcvBadTidErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvHdrFullErrClear_LSB 0xD +#define QIB_7220_ErrClear_RcvHdrFullErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvEgrFullErrClear_LSB 0xC +#define QIB_7220_ErrClear_RcvEgrFullErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvBadVersionErrClear_LSB 0xB +#define QIB_7220_ErrClear_RcvBadVersionErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvIBFlowErrClear_LSB 0xA +#define QIB_7220_ErrClear_RcvIBFlowErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvEBPErrClear_LSB 0x9 +#define QIB_7220_ErrClear_RcvEBPErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8 +#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7 +#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvShortPktLenErrClear_LSB 0x6 +#define QIB_7220_ErrClear_RcvShortPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvLongPktLenErrClear_LSB 0x5 +#define QIB_7220_ErrClear_RcvLongPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_LSB 0x4 +#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvMinPktLenErrClear_LSB 0x3 +#define QIB_7220_ErrClear_RcvMinPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvICRCErrClear_LSB 0x2 +#define QIB_7220_ErrClear_RcvICRCErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvVCRCErrClear_LSB 0x1 +#define QIB_7220_ErrClear_RcvVCRCErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvFormatErrClear_LSB 0x0 +#define QIB_7220_ErrClear_RcvFormatErrClear_RMASK 0x1 + +#define QIB_7220_HwErrMask_OFFS 0x98 +#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F +#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E +#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_LSB 0x3D +#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_RMASK 0x1 +#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C +#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_LSB 0x3B +#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_LSB 0x3A +#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x39 +#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x38 +#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved_LSB 0x37 +#define QIB_7220_HwErrMask_Reserved_RMASK 0x1 +#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_LSB 0x36 +#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved1_LSB 0x33 +#define QIB_7220_HwErrMask_Reserved1_RMASK 0x7 +#define QIB_7220_HwErrMask_RXEMemParityErrMask_LSB 0x2C +#define QIB_7220_HwErrMask_RXEMemParityErrMask_RMASK 0x7F +#define QIB_7220_HwErrMask_TXEMemParityErrMask_LSB 0x28 +#define QIB_7220_HwErrMask_TXEMemParityErrMask_RMASK 0xF +#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_LSB 0x27 +#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_LSB 0x26 +#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_LSB 0x25 +#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_LSB 0x24 +#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved2_LSB 0x22 +#define QIB_7220_HwErrMask_Reserved2_RMASK 0x3 +#define QIB_7220_HwErrMask_PCIeBusParityErrMask_LSB 0x1F +#define QIB_7220_HwErrMask_PCIeBusParityErrMask_RMASK 0x7 +#define QIB_7220_HwErrMask_PcieCplTimeoutMask_LSB 0x1E +#define QIB_7220_HwErrMask_PcieCplTimeoutMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PoisonedTLPMask_LSB 0x1D +#define QIB_7220_HwErrMask_PoisonedTLPMask_RMASK 0x1 +#define QIB_7220_HwErrMask_SDmaMemReadErrMask_LSB 0x1C +#define QIB_7220_HwErrMask_SDmaMemReadErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved3_LSB 0x8 +#define QIB_7220_HwErrMask_Reserved3_RMASK 0xFFFFF +#define QIB_7220_HwErrMask_PCIeMemParityErrMask_LSB 0x0 +#define QIB_7220_HwErrMask_PCIeMemParityErrMask_RMASK 0xFF + +#define QIB_7220_HwErrStatus_OFFS 0xA0 +#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F +#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E +#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_LSB 0x3D +#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_RMASK 0x1 +#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C +#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_LSB 0x3B +#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_LSB 0x3A +#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x39 +#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x38 +#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved_LSB 0x37 +#define QIB_7220_HwErrStatus_Reserved_RMASK 0x1 +#define QIB_7220_HwErrStatus_PowerOnBISTFailed_LSB 0x36 +#define QIB_7220_HwErrStatus_PowerOnBISTFailed_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved1_LSB 0x33 +#define QIB_7220_HwErrStatus_Reserved1_RMASK 0x7 +#define QIB_7220_HwErrStatus_RXEMemParity_LSB 0x2C +#define QIB_7220_HwErrStatus_RXEMemParity_RMASK 0x7F +#define QIB_7220_HwErrStatus_TXEMemParity_LSB 0x28 +#define QIB_7220_HwErrStatus_TXEMemParity_RMASK 0xF +#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_LSB 0x27 +#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_LSB 0x26 +#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_LSB 0x25 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_LSB 0x24 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved2_LSB 0x22 +#define QIB_7220_HwErrStatus_Reserved2_RMASK 0x3 +#define QIB_7220_HwErrStatus_PCIeBusParity_LSB 0x1F +#define QIB_7220_HwErrStatus_PCIeBusParity_RMASK 0x7 +#define QIB_7220_HwErrStatus_PcieCplTimeout_LSB 0x1E +#define QIB_7220_HwErrStatus_PcieCplTimeout_RMASK 0x1 +#define QIB_7220_HwErrStatus_PoisenedTLP_LSB 0x1D +#define QIB_7220_HwErrStatus_PoisenedTLP_RMASK 0x1 +#define QIB_7220_HwErrStatus_SDmaMemReadErr_LSB 0x1C +#define QIB_7220_HwErrStatus_SDmaMemReadErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved3_LSB 0x8 +#define QIB_7220_HwErrStatus_Reserved3_RMASK 0xFFFFF +#define QIB_7220_HwErrStatus_PCIeMemParity_LSB 0x0 +#define QIB_7220_HwErrStatus_PCIeMemParity_RMASK 0xFF + +#define QIB_7220_HwErrClear_OFFS 0xA8 +#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F +#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E +#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_LSB 0x3D +#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_RMASK 0x1 +#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C +#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_LSB 0x3B +#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_LSB 0x3A +#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x39 +#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x38 +#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved_LSB 0x37 +#define QIB_7220_HwErrClear_Reserved_RMASK 0x1 +#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_LSB 0x36 +#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved1_LSB 0x33 +#define QIB_7220_HwErrClear_Reserved1_RMASK 0x7 +#define QIB_7220_HwErrClear_RXEMemParityClear_LSB 0x2C +#define QIB_7220_HwErrClear_RXEMemParityClear_RMASK 0x7F +#define QIB_7220_HwErrClear_TXEMemParityClear_LSB 0x28 +#define QIB_7220_HwErrClear_TXEMemParityClear_RMASK 0xF +#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_LSB 0x27 +#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_LSB 0x26 +#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_LSB 0x25 +#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_LSB 0x24 +#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved2_LSB 0x22 +#define QIB_7220_HwErrClear_Reserved2_RMASK 0x3 +#define QIB_7220_HwErrClear_PCIeBusParityClr_LSB 0x1F +#define QIB_7220_HwErrClear_PCIeBusParityClr_RMASK 0x7 +#define QIB_7220_HwErrClear_PcieCplTimeoutClear_LSB 0x1E +#define QIB_7220_HwErrClear_PcieCplTimeoutClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PoisonedTLPClear_LSB 0x1D +#define QIB_7220_HwErrClear_PoisonedTLPClear_RMASK 0x1 +#define QIB_7220_HwErrClear_SDmaMemReadErrClear_LSB 0x1C +#define QIB_7220_HwErrClear_SDmaMemReadErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved3_LSB 0x8 +#define QIB_7220_HwErrClear_Reserved3_RMASK 0xFFFFF +#define QIB_7220_HwErrClear_PCIeMemParityClr_LSB 0x0 +#define QIB_7220_HwErrClear_PCIeMemParityClr_RMASK 0xFF + +#define QIB_7220_HwDiagCtrl_OFFS 0xB0 +#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F +#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E +#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_CounterWrEnable_LSB 0x3D +#define QIB_7220_HwDiagCtrl_CounterWrEnable_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_CounterDisable_LSB 0x3C +#define QIB_7220_HwDiagCtrl_CounterDisable_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_Reserved_LSB 0x33 +#define QIB_7220_HwDiagCtrl_Reserved_RMASK 0x1FF +#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C +#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F +#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28 +#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF +#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_LSB 0x27 +#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_LSB 0x26 +#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_LSB 0x25 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_LSB 0x24 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_Reserved1_LSB 0x23 +#define QIB_7220_HwDiagCtrl_Reserved1_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F +#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF +#define QIB_7220_HwDiagCtrl_Reserved2_LSB 0x8 +#define QIB_7220_HwDiagCtrl_Reserved2_RMASK 0x7FFFFF +#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_LSB 0x0 +#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_RMASK 0xFF + +#define QIB_7220_REG_0000B8_OFFS 0xB8 + +#define QIB_7220_IBCStatus_OFFS 0xC0 +#define QIB_7220_IBCStatus_TxCreditOk_LSB 0x1F +#define QIB_7220_IBCStatus_TxCreditOk_RMASK 0x1 +#define QIB_7220_IBCStatus_TxReady_LSB 0x1E +#define QIB_7220_IBCStatus_TxReady_RMASK 0x1 +#define QIB_7220_IBCStatus_Reserved_LSB 0xE +#define QIB_7220_IBCStatus_Reserved_RMASK 0xFFFF +#define QIB_7220_IBCStatus_IBTxLaneReversed_LSB 0xD +#define QIB_7220_IBCStatus_IBTxLaneReversed_RMASK 0x1 +#define QIB_7220_IBCStatus_IBRxLaneReversed_LSB 0xC +#define QIB_7220_IBCStatus_IBRxLaneReversed_RMASK 0x1 +#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_LSB 0xB +#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_RMASK 0x1 +#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_LSB 0xA +#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_RMASK 0x1 +#define QIB_7220_IBCStatus_LinkWidthActive_LSB 0x9 +#define QIB_7220_IBCStatus_LinkWidthActive_RMASK 0x1 +#define QIB_7220_IBCStatus_LinkSpeedActive_LSB 0x8 +#define QIB_7220_IBCStatus_LinkSpeedActive_RMASK 0x1 +#define QIB_7220_IBCStatus_LinkState_LSB 0x5 +#define QIB_7220_IBCStatus_LinkState_RMASK 0x7 +#define QIB_7220_IBCStatus_LinkTrainingState_LSB 0x0 +#define QIB_7220_IBCStatus_LinkTrainingState_RMASK 0x1F + +#define QIB_7220_IBCCtrl_OFFS 0xC8 +#define QIB_7220_IBCCtrl_Loopback_LSB 0x3F +#define QIB_7220_IBCCtrl_Loopback_RMASK 0x1 +#define QIB_7220_IBCCtrl_LinkDownDefaultState_LSB 0x3E +#define QIB_7220_IBCCtrl_LinkDownDefaultState_RMASK 0x1 +#define QIB_7220_IBCCtrl_Reserved_LSB 0x2B +#define QIB_7220_IBCCtrl_Reserved_RMASK 0x7FFFF +#define QIB_7220_IBCCtrl_CreditScale_LSB 0x28 +#define QIB_7220_IBCCtrl_CreditScale_RMASK 0x7 +#define QIB_7220_IBCCtrl_OverrunThreshold_LSB 0x24 +#define QIB_7220_IBCCtrl_OverrunThreshold_RMASK 0xF +#define QIB_7220_IBCCtrl_PhyerrThreshold_LSB 0x20 +#define QIB_7220_IBCCtrl_PhyerrThreshold_RMASK 0xF +#define QIB_7220_IBCCtrl_MaxPktLen_LSB 0x15 +#define QIB_7220_IBCCtrl_MaxPktLen_RMASK 0x7FF +#define QIB_7220_IBCCtrl_LinkCmd_LSB 0x13 +#define QIB_7220_IBCCtrl_LinkCmd_RMASK 0x3 +#define QIB_7220_IBCCtrl_LinkInitCmd_LSB 0x10 +#define QIB_7220_IBCCtrl_LinkInitCmd_RMASK 0x7 +#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_LSB 0x8 +#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF +#define QIB_7220_IBCCtrl_FlowCtrlPeriod_LSB 0x0 +#define QIB_7220_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF + +#define QIB_7220_EXTStatus_OFFS 0xD0 +#define QIB_7220_EXTStatus_GPIOIn_LSB 0x30 +#define QIB_7220_EXTStatus_GPIOIn_RMASK 0xFFFF +#define QIB_7220_EXTStatus_Reserved_LSB 0x20 +#define QIB_7220_EXTStatus_Reserved_RMASK 0xFFFF +#define QIB_7220_EXTStatus_Reserved1_LSB 0x10 +#define QIB_7220_EXTStatus_Reserved1_RMASK 0xFFFF +#define QIB_7220_EXTStatus_MemBISTDisabled_LSB 0xF +#define QIB_7220_EXTStatus_MemBISTDisabled_RMASK 0x1 +#define QIB_7220_EXTStatus_MemBISTEndTest_LSB 0xE +#define QIB_7220_EXTStatus_MemBISTEndTest_RMASK 0x1 +#define QIB_7220_EXTStatus_Reserved2_LSB 0x0 +#define QIB_7220_EXTStatus_Reserved2_RMASK 0x3FFF + +#define QIB_7220_EXTCtrl_OFFS 0xD8 +#define QIB_7220_EXTCtrl_GPIOOe_LSB 0x30 +#define QIB_7220_EXTCtrl_GPIOOe_RMASK 0xFFFF +#define QIB_7220_EXTCtrl_GPIOInvert_LSB 0x20 +#define QIB_7220_EXTCtrl_GPIOInvert_RMASK 0xFFFF +#define QIB_7220_EXTCtrl_Reserved_LSB 0x4 +#define QIB_7220_EXTCtrl_Reserved_RMASK 0xFFFFFFF +#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_LSB 0x3 +#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1 +#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_LSB 0x2 +#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1 +#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_LSB 0x1 +#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1 +#define QIB_7220_EXTCtrl_LEDGblErrRedOff_LSB 0x0 +#define QIB_7220_EXTCtrl_LEDGblErrRedOff_RMASK 0x1 + +#define QIB_7220_GPIOOut_OFFS 0xE0 + +#define QIB_7220_GPIOMask_OFFS 0xE8 + +#define QIB_7220_GPIOStatus_OFFS 0xF0 + +#define QIB_7220_GPIOClear_OFFS 0xF8 + +#define QIB_7220_RcvCtrl_OFFS 0x100 +#define QIB_7220_RcvCtrl_Reserved_LSB 0x27 +#define QIB_7220_RcvCtrl_Reserved_RMASK 0x1FFFFFF +#define QIB_7220_RcvCtrl_RcvQPMapEnable_LSB 0x26 +#define QIB_7220_RcvCtrl_RcvQPMapEnable_RMASK 0x1 +#define QIB_7220_RcvCtrl_PortCfg_LSB 0x24 +#define QIB_7220_RcvCtrl_PortCfg_RMASK 0x3 +#define QIB_7220_RcvCtrl_TailUpd_LSB 0x23 +#define QIB_7220_RcvCtrl_TailUpd_RMASK 0x1 +#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_LSB 0x22 +#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1 +#define QIB_7220_RcvCtrl_IntrAvail_LSB 0x11 +#define QIB_7220_RcvCtrl_IntrAvail_RMASK 0x1FFFF +#define QIB_7220_RcvCtrl_PortEnable_LSB 0x0 +#define QIB_7220_RcvCtrl_PortEnable_RMASK 0x1FFFF + +#define QIB_7220_RcvBTHQP_OFFS 0x108 +#define QIB_7220_RcvBTHQP_Reserved_LSB 0x18 +#define QIB_7220_RcvBTHQP_Reserved_RMASK 0xFF +#define QIB_7220_RcvBTHQP_RcvBTHQP_LSB 0x0 +#define QIB_7220_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF + +#define QIB_7220_RcvHdrSize_OFFS 0x110 + +#define QIB_7220_RcvHdrCnt_OFFS 0x118 + +#define QIB_7220_RcvHdrEntSize_OFFS 0x120 + +#define QIB_7220_RcvTIDBase_OFFS 0x128 + +#define QIB_7220_RcvTIDCnt_OFFS 0x130 + +#define QIB_7220_RcvEgrBase_OFFS 0x138 + +#define QIB_7220_RcvEgrCnt_OFFS 0x140 + +#define QIB_7220_RcvBufBase_OFFS 0x148 + +#define QIB_7220_RcvBufSize_OFFS 0x150 + +#define QIB_7220_RxIntMemBase_OFFS 0x158 + +#define QIB_7220_RxIntMemSize_OFFS 0x160 + +#define QIB_7220_RcvPartitionKey_OFFS 0x168 + +#define QIB_7220_RcvQPMulticastPort_OFFS 0x170 +#define QIB_7220_RcvQPMulticastPort_Reserved_LSB 0x5 +#define QIB_7220_RcvQPMulticastPort_Reserved_RMASK 0x7FFFFFFFFFFFFFF +#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_LSB 0x0 +#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_RMASK 0x1F + +#define QIB_7220_RcvPktLEDCnt_OFFS 0x178 +#define QIB_7220_RcvPktLEDCnt_ONperiod_LSB 0x20 +#define QIB_7220_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF +#define QIB_7220_RcvPktLEDCnt_OFFperiod_LSB 0x0 +#define QIB_7220_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF + +#define QIB_7220_IBCDDRCtrl_OFFS 0x180 +#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_LSB 0x30 +#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_RMASK 0xFFFF +#define QIB_7220_IBCDDRCtrl_IB_DLID_LSB 0x20 +#define QIB_7220_IBCDDRCtrl_IB_DLID_RMASK 0xFFFF +#define QIB_7220_IBCDDRCtrl_Reserved_LSB 0x1B +#define QIB_7220_IBCDDRCtrl_Reserved_RMASK 0x1F +#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_LSB 0x1A +#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_LSB 0x12 +#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_RMASK 0xFF +#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_LSB 0x11 +#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_LSB 0x10 +#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_DDS_LSB 0xC +#define QIB_7220_IBCDDRCtrl_SD_DDS_RMASK 0xF +#define QIB_7220_IBCDDRCtrl_SD_DDSV_LSB 0xB +#define QIB_7220_IBCDDRCtrl_SD_DDSV_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_LSB 0xA +#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_LSB 0x9 +#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_LSB 0x8 +#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_LSB 0x7 +#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_LSB 0x5 +#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_RMASK 0x3 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_LSB 0x4 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_LSB 0x3 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_LSB 0x2 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_LSB 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_LSB 0x0 +#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_RMASK 0x1 + +#define QIB_7220_HRTBT_GUID_OFFS 0x188 + +#define QIB_7220_IBCDDRCtrl2_OFFS 0x1A0 +#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_LSB 0x5 +#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_RMASK 0x1F +#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_LSB 0x0 +#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_RMASK 0x1F + +#define QIB_7220_IBCDDRStatus_OFFS 0x1A8 +#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_LSB 0x24 +#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_RMASK 0x1 +#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_LSB 0x20 +#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_RMASK 0xF +#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_LSB 0x1E +#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_RMASK 0x3 +#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_LSB 0x1A +#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_RMASK 0xF +#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_LSB 0x0 +#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_RMASK 0x3FFFFFF + +#define QIB_7220_JIntReload_OFFS 0x1B0 +#define QIB_7220_JIntReload_J_limit_reload_LSB 0x10 +#define QIB_7220_JIntReload_J_limit_reload_RMASK 0xFFFF +#define QIB_7220_JIntReload_J_reload_LSB 0x0 +#define QIB_7220_JIntReload_J_reload_RMASK 0xFFFF + +#define QIB_7220_IBNCModeCtrl_OFFS 0x1B8 +#define QIB_7220_IBNCModeCtrl_Reserved_LSB 0x1A +#define QIB_7220_IBNCModeCtrl_Reserved_RMASK 0x3FFFFFFFFF +#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_LSB 0x11 +#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_RMASK 0x1FF +#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_LSB 0x8 +#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_RMASK 0x1FF +#define QIB_7220_IBNCModeCtrl_Reserved1_LSB 0x3 +#define QIB_7220_IBNCModeCtrl_Reserved1_RMASK 0x1F +#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_LSB 0x2 +#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_RMASK 0x1 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_LSB 0x1 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_RMASK 0x1 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_LSB 0x0 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_RMASK 0x1 + +#define QIB_7220_SendCtrl_OFFS 0x1C0 +#define QIB_7220_SendCtrl_Disarm_LSB 0x1F +#define QIB_7220_SendCtrl_Disarm_RMASK 0x1 +#define QIB_7220_SendCtrl_Reserved_LSB 0x1D +#define QIB_7220_SendCtrl_Reserved_RMASK 0x3 +#define QIB_7220_SendCtrl_AvailUpdThld_LSB 0x18 +#define QIB_7220_SendCtrl_AvailUpdThld_RMASK 0x1F +#define QIB_7220_SendCtrl_DisarmPIOBuf_LSB 0x10 +#define QIB_7220_SendCtrl_DisarmPIOBuf_RMASK 0xFF +#define QIB_7220_SendCtrl_Reserved1_LSB 0xD +#define QIB_7220_SendCtrl_Reserved1_RMASK 0x7 +#define QIB_7220_SendCtrl_SDmaHalt_LSB 0xC +#define QIB_7220_SendCtrl_SDmaHalt_RMASK 0x1 +#define QIB_7220_SendCtrl_SDmaEnable_LSB 0xB +#define QIB_7220_SendCtrl_SDmaEnable_RMASK 0x1 +#define QIB_7220_SendCtrl_SDmaSingleDescriptor_LSB 0xA +#define QIB_7220_SendCtrl_SDmaSingleDescriptor_RMASK 0x1 +#define QIB_7220_SendCtrl_SDmaIntEnable_LSB 0x9 +#define QIB_7220_SendCtrl_SDmaIntEnable_RMASK 0x1 +#define QIB_7220_SendCtrl_Reserved2_LSB 0x5 +#define QIB_7220_SendCtrl_Reserved2_RMASK 0xF +#define QIB_7220_SendCtrl_SSpecialTriggerEn_LSB 0x4 +#define QIB_7220_SendCtrl_SSpecialTriggerEn_RMASK 0x1 +#define QIB_7220_SendCtrl_SPioEnable_LSB 0x3 +#define QIB_7220_SendCtrl_SPioEnable_RMASK 0x1 +#define QIB_7220_SendCtrl_SendBufAvailUpd_LSB 0x2 +#define QIB_7220_SendCtrl_SendBufAvailUpd_RMASK 0x1 +#define QIB_7220_SendCtrl_SendIntBufAvail_LSB 0x1 +#define QIB_7220_SendCtrl_SendIntBufAvail_RMASK 0x1 +#define QIB_7220_SendCtrl_Abort_LSB 0x0 +#define QIB_7220_SendCtrl_Abort_RMASK 0x1 + +#define QIB_7220_SendBufBase_OFFS 0x1C8 +#define QIB_7220_SendBufBase_Reserved_LSB 0x35 +#define QIB_7220_SendBufBase_Reserved_RMASK 0x7FF +#define QIB_7220_SendBufBase_BaseAddr_LargePIO_LSB 0x20 +#define QIB_7220_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF +#define QIB_7220_SendBufBase_Reserved1_LSB 0x15 +#define QIB_7220_SendBufBase_Reserved1_RMASK 0x7FF +#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_LSB 0x0 +#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF + +#define QIB_7220_SendBufSize_OFFS 0x1D0 +#define QIB_7220_SendBufSize_Reserved_LSB 0x2D +#define QIB_7220_SendBufSize_Reserved_RMASK 0xFFFFF +#define QIB_7220_SendBufSize_Size_LargePIO_LSB 0x20 +#define QIB_7220_SendBufSize_Size_LargePIO_RMASK 0x1FFF +#define QIB_7220_SendBufSize_Reserved1_LSB 0xC +#define QIB_7220_SendBufSize_Reserved1_RMASK 0xFFFFF +#define QIB_7220_SendBufSize_Size_SmallPIO_LSB 0x0 +#define QIB_7220_SendBufSize_Size_SmallPIO_RMASK 0xFFF + +#define QIB_7220_SendBufCnt_OFFS 0x1D8 +#define QIB_7220_SendBufCnt_Reserved_LSB 0x24 +#define QIB_7220_SendBufCnt_Reserved_RMASK 0xFFFFFFF +#define QIB_7220_SendBufCnt_Num_LargeBuffers_LSB 0x20 +#define QIB_7220_SendBufCnt_Num_LargeBuffers_RMASK 0xF +#define QIB_7220_SendBufCnt_Reserved1_LSB 0x9 +#define QIB_7220_SendBufCnt_Reserved1_RMASK 0x7FFFFF +#define QIB_7220_SendBufCnt_Num_SmallBuffers_LSB 0x0 +#define QIB_7220_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF + +#define QIB_7220_SendBufAvailAddr_OFFS 0x1E0 +#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6 +#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF +#define QIB_7220_SendBufAvailAddr_Reserved_LSB 0x0 +#define QIB_7220_SendBufAvailAddr_Reserved_RMASK 0x3F + +#define QIB_7220_TxIntMemBase_OFFS 0x1E8 + +#define QIB_7220_TxIntMemSize_OFFS 0x1F0 + +#define QIB_7220_SendDmaBase_OFFS 0x1F8 +#define QIB_7220_SendDmaBase_Reserved_LSB 0x30 +#define QIB_7220_SendDmaBase_Reserved_RMASK 0xFFFF +#define QIB_7220_SendDmaBase_SendDmaBase_LSB 0x0 +#define QIB_7220_SendDmaBase_SendDmaBase_RMASK 0xFFFFFFFFFFFF + +#define QIB_7220_SendDmaLenGen_OFFS 0x200 +#define QIB_7220_SendDmaLenGen_Reserved_LSB 0x13 +#define QIB_7220_SendDmaLenGen_Reserved_RMASK 0x1FFFFFFFFFFF +#define QIB_7220_SendDmaLenGen_Generation_LSB 0x10 +#define QIB_7220_SendDmaLenGen_Generation_MSB 0x12 +#define QIB_7220_SendDmaLenGen_Generation_RMASK 0x7 +#define QIB_7220_SendDmaLenGen_Length_LSB 0x0 +#define QIB_7220_SendDmaLenGen_Length_RMASK 0xFFFF + +#define QIB_7220_SendDmaTail_OFFS 0x208 +#define QIB_7220_SendDmaTail_Reserved_LSB 0x10 +#define QIB_7220_SendDmaTail_Reserved_RMASK 0xFFFFFFFFFFFF +#define QIB_7220_SendDmaTail_SendDmaTail_LSB 0x0 +#define QIB_7220_SendDmaTail_SendDmaTail_RMASK 0xFFFF + +#define QIB_7220_SendDmaHead_OFFS 0x210 +#define QIB_7220_SendDmaHead_Reserved_LSB 0x30 +#define QIB_7220_SendDmaHead_Reserved_RMASK 0xFFFF +#define QIB_7220_SendDmaHead_InternalSendDmaHead_LSB 0x20 +#define QIB_7220_SendDmaHead_InternalSendDmaHead_RMASK 0xFFFF +#define QIB_7220_SendDmaHead_Reserved1_LSB 0x10 +#define QIB_7220_SendDmaHead_Reserved1_RMASK 0xFFFF +#define QIB_7220_SendDmaHead_SendDmaHead_LSB 0x0 +#define QIB_7220_SendDmaHead_SendDmaHead_RMASK 0xFFFF + +#define QIB_7220_SendDmaHeadAddr_OFFS 0x218 +#define QIB_7220_SendDmaHeadAddr_Reserved_LSB 0x30 +#define QIB_7220_SendDmaHeadAddr_Reserved_RMASK 0xFFFF +#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_LSB 0x0 +#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_RMASK 0xFFFFFFFFFFFF + +#define QIB_7220_SendDmaBufMask0_OFFS 0x220 +#define QIB_7220_SendDmaBufMask0_BufMask_63_0_LSB 0x0 +#define QIB_7220_SendDmaBufMask0_BufMask_63_0_RMASK 0x0 + +#define QIB_7220_SendDmaStatus_OFFS 0x238 +#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_LSB 0x3F +#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_RMASK 0x1 +#define QIB_7220_SendDmaStatus_AbortInProg_LSB 0x3E +#define QIB_7220_SendDmaStatus_AbortInProg_RMASK 0x1 +#define QIB_7220_SendDmaStatus_InternalSDmaEnable_LSB 0x3D +#define QIB_7220_SendDmaStatus_InternalSDmaEnable_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_LSB 0x2F +#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_RMASK 0x3FFF +#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_LSB 0x28 +#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_RMASK 0x7F +#define QIB_7220_SendDmaStatus_RpyTag_7_0_LSB 0x20 +#define QIB_7220_SendDmaStatus_RpyTag_7_0_RMASK 0xFF +#define QIB_7220_SendDmaStatus_ScbFull_LSB 0x1F +#define QIB_7220_SendDmaStatus_ScbFull_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbEmpty_LSB 0x1E +#define QIB_7220_SendDmaStatus_ScbEmpty_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbEntryValid_LSB 0x1D +#define QIB_7220_SendDmaStatus_ScbEntryValid_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_LSB 0x1C +#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_LSB 0x1B +#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoDisarmed_LSB 0x1A +#define QIB_7220_SendDmaStatus_SplFifoDisarmed_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoEmpty_LSB 0x19 +#define QIB_7220_SendDmaStatus_SplFifoEmpty_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoFull_LSB 0x18 +#define QIB_7220_SendDmaStatus_SplFifoFull_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoBufNum_LSB 0x10 +#define QIB_7220_SendDmaStatus_SplFifoBufNum_RMASK 0xFF +#define QIB_7220_SendDmaStatus_SplFifoDescIndex_LSB 0x0 +#define QIB_7220_SendDmaStatus_SplFifoDescIndex_RMASK 0xFFFF + +#define QIB_7220_SendBufErr0_OFFS 0x240 +#define QIB_7220_SendBufErr0_SendBufErr_63_0_LSB 0x0 +#define QIB_7220_SendBufErr0_SendBufErr_63_0_RMASK 0x0 + +#define QIB_7220_RcvHdrAddr0_OFFS 0x270 +#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2 +#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF +#define QIB_7220_RcvHdrAddr0_Reserved_LSB 0x0 +#define QIB_7220_RcvHdrAddr0_Reserved_RMASK 0x3 + +#define QIB_7220_RcvHdrTailAddr0_OFFS 0x300 +#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2 +#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF +#define QIB_7220_RcvHdrTailAddr0_Reserved_LSB 0x0 +#define QIB_7220_RcvHdrTailAddr0_Reserved_RMASK 0x3 + +#define QIB_7220_ibsd_epb_access_ctrl_OFFS 0x3C0 +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_LSB 0x8 +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_RMASK 0x1 +#define QIB_7220_ibsd_epb_access_ctrl_Reserved_LSB 0x1 +#define QIB_7220_ibsd_epb_access_ctrl_Reserved_RMASK 0x7F +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_LSB 0x0 +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_RMASK 0x1 + +#define QIB_7220_ibsd_epb_transaction_reg_OFFS 0x3C8 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_LSB 0x1F +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_LSB 0x1E +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved_LSB 0x1D +#define QIB_7220_ibsd_epb_transaction_reg_Reserved_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_LSB 0x1C +#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_LSB 0x1B +#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_LSB 0x19 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_RMASK 0x3 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_LSB 0x18 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_LSB 0x17 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_LSB 0x8 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_RMASK 0x7FFF +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_LSB 0x0 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_RMASK 0xFF + +#define QIB_7220_XGXSCfg_OFFS 0x3D8 +#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_LSB 0x3F +#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_RMASK 0x1 +#define QIB_7220_XGXSCfg_Reserved_LSB 0x13 +#define QIB_7220_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFFF +#define QIB_7220_XGXSCfg_link_sync_mask_LSB 0x9 +#define QIB_7220_XGXSCfg_link_sync_mask_RMASK 0x3FF +#define QIB_7220_XGXSCfg_Reserved1_LSB 0x3 +#define QIB_7220_XGXSCfg_Reserved1_RMASK 0x3F +#define QIB_7220_XGXSCfg_xcv_reset_LSB 0x2 +#define QIB_7220_XGXSCfg_xcv_reset_RMASK 0x1 +#define QIB_7220_XGXSCfg_Reserved2_LSB 0x1 +#define QIB_7220_XGXSCfg_Reserved2_RMASK 0x1 +#define QIB_7220_XGXSCfg_tx_rx_reset_LSB 0x0 +#define QIB_7220_XGXSCfg_tx_rx_reset_RMASK 0x1 + +#define QIB_7220_IBSerDesCtrl_OFFS 0x3E0 +#define QIB_7220_IBSerDesCtrl_Reserved_LSB 0x2D +#define QIB_7220_IBSerDesCtrl_Reserved_RMASK 0x7FFFF +#define QIB_7220_IBSerDesCtrl_INT_uC_LSB 0x2C +#define QIB_7220_IBSerDesCtrl_INT_uC_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_CKSEL_uC_LSB 0x2A +#define QIB_7220_IBSerDesCtrl_CKSEL_uC_RMASK 0x3 +#define QIB_7220_IBSerDesCtrl_PLLN_LSB 0x28 +#define QIB_7220_IBSerDesCtrl_PLLN_RMASK 0x3 +#define QIB_7220_IBSerDesCtrl_PLLM_LSB 0x25 +#define QIB_7220_IBSerDesCtrl_PLLM_RMASK 0x7 +#define QIB_7220_IBSerDesCtrl_TXOBPD_LSB 0x24 +#define QIB_7220_IBSerDesCtrl_TXOBPD_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_TWC_LSB 0x23 +#define QIB_7220_IBSerDesCtrl_TWC_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_RXIDLE_LSB 0x22 +#define QIB_7220_IBSerDesCtrl_RXIDLE_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_RXINV_LSB 0x21 +#define QIB_7220_IBSerDesCtrl_RXINV_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_TXINV_LSB 0x20 +#define QIB_7220_IBSerDesCtrl_TXINV_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_Reserved1_LSB 0x12 +#define QIB_7220_IBSerDesCtrl_Reserved1_RMASK 0x3FFF +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_LSB 0xD +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_RMASK 0x1F +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_LSB 0x8 +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_RMASK 0x1F +#define QIB_7220_IBSerDesCtrl_Reserved2_LSB 0x1 +#define QIB_7220_IBSerDesCtrl_Reserved2_RMASK 0x7F +#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_LSB 0x0 +#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_RMASK 0x1 + +#define QIB_7220_pciesd_epb_access_ctrl_OFFS 0x400 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_LSB 0x8 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_RMASK 0x1 +#define QIB_7220_pciesd_epb_access_ctrl_Reserved_LSB 0x3 +#define QIB_7220_pciesd_epb_access_ctrl_Reserved_RMASK 0x1F +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_LSB 0x1 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_RMASK 0x3 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_LSB 0x0 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_RMASK 0x1 + +#define QIB_7220_pciesd_epb_transaction_reg_OFFS 0x408 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_LSB 0x1F +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_LSB 0x1E +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_Reserved_LSB 0x1D +#define QIB_7220_pciesd_epb_transaction_reg_Reserved_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_LSB 0x1C +#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_LSB 0x19 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_RMASK 0x7 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_LSB 0x18 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_LSB 0x17 +#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_LSB 0x8 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_RMASK 0x7FFF +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_LSB 0x0 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_RMASK 0xFF + +#define QIB_7220_SerDes_DDSRXEQ0_OFFS 0x500 +#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_LSB 0x4 +#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_RMASK 0x3F +#define QIB_7220_SerDes_DDSRXEQ0_element_num_LSB 0x0 +#define QIB_7220_SerDes_DDSRXEQ0_element_num_RMASK 0xF + +#define QIB_7220_LBIntCnt_OFFS 0x13000 + +#define QIB_7220_LBFlowStallCnt_OFFS 0x13008 + +#define QIB_7220_TxSDmaDescCnt_OFFS 0x13010 + +#define QIB_7220_TxUnsupVLErrCnt_OFFS 0x13018 + +#define QIB_7220_TxDataPktCnt_OFFS 0x13020 + +#define QIB_7220_TxFlowPktCnt_OFFS 0x13028 + +#define QIB_7220_TxDwordCnt_OFFS 0x13030 + +#define QIB_7220_TxLenErrCnt_OFFS 0x13038 + +#define QIB_7220_TxMaxMinLenErrCnt_OFFS 0x13040 + +#define QIB_7220_TxUnderrunCnt_OFFS 0x13048 + +#define QIB_7220_TxFlowStallCnt_OFFS 0x13050 + +#define QIB_7220_TxDroppedPktCnt_OFFS 0x13058 + +#define QIB_7220_RxDroppedPktCnt_OFFS 0x13060 + +#define QIB_7220_RxDataPktCnt_OFFS 0x13068 + +#define QIB_7220_RxFlowPktCnt_OFFS 0x13070 + +#define QIB_7220_RxDwordCnt_OFFS 0x13078 + +#define QIB_7220_RxLenErrCnt_OFFS 0x13080 + +#define QIB_7220_RxMaxMinLenErrCnt_OFFS 0x13088 + +#define QIB_7220_RxICRCErrCnt_OFFS 0x13090 + +#define QIB_7220_RxVCRCErrCnt_OFFS 0x13098 + +#define QIB_7220_RxFlowCtrlViolCnt_OFFS 0x130A0 + +#define QIB_7220_RxVersionErrCnt_OFFS 0x130A8 + +#define QIB_7220_RxLinkMalformCnt_OFFS 0x130B0 + +#define QIB_7220_RxEBPCnt_OFFS 0x130B8 + +#define QIB_7220_RxLPCRCErrCnt_OFFS 0x130C0 + +#define QIB_7220_RxBufOvflCnt_OFFS 0x130C8 + +#define QIB_7220_RxTIDFullErrCnt_OFFS 0x130D0 + +#define QIB_7220_RxTIDValidErrCnt_OFFS 0x130D8 + +#define QIB_7220_RxPKeyMismatchCnt_OFFS 0x130E0 + +#define QIB_7220_RxP0HdrEgrOvflCnt_OFFS 0x130E8 + +#define QIB_7220_IBStatusChangeCnt_OFFS 0x13170 + +#define QIB_7220_IBLinkErrRecoveryCnt_OFFS 0x13178 + +#define QIB_7220_IBLinkDownedCnt_OFFS 0x13180 + +#define QIB_7220_IBSymbolErrCnt_OFFS 0x13188 + +#define QIB_7220_RxVL15DroppedPktCnt_OFFS 0x13190 + +#define QIB_7220_RxOtherLocalPhyErrCnt_OFFS 0x13198 + +#define QIB_7220_PcieRetryBufDiagQwordCnt_OFFS 0x131A0 + +#define QIB_7220_ExcessBufferOvflCnt_OFFS 0x131A8 + +#define QIB_7220_LocalLinkIntegrityErrCnt_OFFS 0x131B0 + +#define QIB_7220_RxVlErrCnt_OFFS 0x131B8 + +#define QIB_7220_RxDlidFltrCnt_OFFS 0x131C0 + +#define QIB_7220_CNT_0131C8_OFFS 0x131C8 + +#define QIB_7220_PSStat_OFFS 0x13200 + +#define QIB_7220_PSStart_OFFS 0x13208 + +#define QIB_7220_PSInterval_OFFS 0x13210 + +#define QIB_7220_PSRcvDataCount_OFFS 0x13218 + +#define QIB_7220_PSRcvPktsCount_OFFS 0x13220 + +#define QIB_7220_PSXmitDataCount_OFFS 0x13228 + +#define QIB_7220_PSXmitPktsCount_OFFS 0x13230 + +#define QIB_7220_PSXmitWaitCount_OFFS 0x13238 + +#define QIB_7220_CNT_013240_OFFS 0x13240 + +#define QIB_7220_RcvEgrArray_OFFS 0x14000 + +#define QIB_7220_MEM_038000_OFFS 0x38000 + +#define QIB_7220_RcvTIDArray0_OFFS 0x53000 + +#define QIB_7220_PIOLaunchFIFO_OFFS 0x64000 + +#define QIB_7220_MEM_064480_OFFS 0x64480 + +#define QIB_7220_SendPIOpbcCache_OFFS 0x64800 + +#define QIB_7220_MEM_064C80_OFFS 0x64C80 + +#define QIB_7220_PreLaunchFIFO_OFFS 0x65000 + +#define QIB_7220_MEM_065080_OFFS 0x65080 + +#define QIB_7220_ScoreBoard_OFFS 0x65400 + +#define QIB_7220_MEM_065440_OFFS 0x65440 + +#define QIB_7220_DescriptorFIFO_OFFS 0x65800 + +#define QIB_7220_MEM_065880_OFFS 0x65880 + +#define QIB_7220_RcvBuf1_OFFS 0x72000 + +#define QIB_7220_MEM_074800_OFFS 0x74800 + +#define QIB_7220_RcvBuf2_OFFS 0x75000 + +#define QIB_7220_MEM_076400_OFFS 0x76400 + +#define QIB_7220_RcvFlags_OFFS 0x77000 + +#define QIB_7220_MEM_078400_OFFS 0x78400 + +#define QIB_7220_RcvLookupBuf1_OFFS 0x79000 + +#define QIB_7220_MEM_07A400_OFFS 0x7A400 + +#define QIB_7220_RcvDMADatBuf_OFFS 0x7B000 + +#define QIB_7220_RcvDMAHdrBuf_OFFS 0x7B800 + +#define QIB_7220_MiscRXEIntMem_OFFS 0x7C000 + +#define QIB_7220_MEM_07D400_OFFS 0x7D400 + +#define QIB_7220_PCIERcvBuf_OFFS 0x80000 + +#define QIB_7220_PCIERetryBuf_OFFS 0x84000 + +#define QIB_7220_PCIERcvBufRdToWrAddr_OFFS 0x88000 + +#define QIB_7220_PCIECplBuf_OFFS 0x90000 + +#define QIB_7220_IBSerDesMappTable_OFFS 0x94000 + +#define QIB_7220_MEM_095000_OFFS 0x95000 + +#define QIB_7220_SendBuf0_MA_OFFS 0x100000 + +#define QIB_7220_MEM_1A0000_OFFS 0x1A0000 diff --git a/drivers/infiniband/hw/qib/qib_7322_regs.h b/drivers/infiniband/hw/qib/qib_7322_regs.h new file mode 100644 index 00000000..32dc81ff --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_7322_regs.h @@ -0,0 +1,3163 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is mechanically generated from RTL. Any hand-edits will be lost! */ + +#define QIB_7322_Revision_OFFS 0x0 +#define QIB_7322_Revision_DEF 0x0000000002010601 +#define QIB_7322_Revision_R_Simulator_LSB 0x3F +#define QIB_7322_Revision_R_Simulator_MSB 0x3F +#define QIB_7322_Revision_R_Simulator_RMASK 0x1 +#define QIB_7322_Revision_R_Emulation_LSB 0x3E +#define QIB_7322_Revision_R_Emulation_MSB 0x3E +#define QIB_7322_Revision_R_Emulation_RMASK 0x1 +#define QIB_7322_Revision_R_Emulation_Revcode_LSB 0x28 +#define QIB_7322_Revision_R_Emulation_Revcode_MSB 0x3D +#define QIB_7322_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF +#define QIB_7322_Revision_BoardID_LSB 0x20 +#define QIB_7322_Revision_BoardID_MSB 0x27 +#define QIB_7322_Revision_BoardID_RMASK 0xFF +#define QIB_7322_Revision_R_SW_LSB 0x18 +#define QIB_7322_Revision_R_SW_MSB 0x1F +#define QIB_7322_Revision_R_SW_RMASK 0xFF +#define QIB_7322_Revision_R_Arch_LSB 0x10 +#define QIB_7322_Revision_R_Arch_MSB 0x17 +#define QIB_7322_Revision_R_Arch_RMASK 0xFF +#define QIB_7322_Revision_R_ChipRevMajor_LSB 0x8 +#define QIB_7322_Revision_R_ChipRevMajor_MSB 0xF +#define QIB_7322_Revision_R_ChipRevMajor_RMASK 0xFF +#define QIB_7322_Revision_R_ChipRevMinor_LSB 0x0 +#define QIB_7322_Revision_R_ChipRevMinor_MSB 0x7 +#define QIB_7322_Revision_R_ChipRevMinor_RMASK 0xFF + +#define QIB_7322_Control_OFFS 0x8 +#define QIB_7322_Control_DEF 0x0000000000000000 +#define QIB_7322_Control_PCIECplQDiagEn_LSB 0x6 +#define QIB_7322_Control_PCIECplQDiagEn_MSB 0x6 +#define QIB_7322_Control_PCIECplQDiagEn_RMASK 0x1 +#define QIB_7322_Control_PCIEPostQDiagEn_LSB 0x5 +#define QIB_7322_Control_PCIEPostQDiagEn_MSB 0x5 +#define QIB_7322_Control_PCIEPostQDiagEn_RMASK 0x1 +#define QIB_7322_Control_SDmaDescFetchPriorityEn_LSB 0x4 +#define QIB_7322_Control_SDmaDescFetchPriorityEn_MSB 0x4 +#define QIB_7322_Control_SDmaDescFetchPriorityEn_RMASK 0x1 +#define QIB_7322_Control_PCIERetryBufDiagEn_LSB 0x3 +#define QIB_7322_Control_PCIERetryBufDiagEn_MSB 0x3 +#define QIB_7322_Control_PCIERetryBufDiagEn_RMASK 0x1 +#define QIB_7322_Control_FreezeMode_LSB 0x1 +#define QIB_7322_Control_FreezeMode_MSB 0x1 +#define QIB_7322_Control_FreezeMode_RMASK 0x1 +#define QIB_7322_Control_SyncReset_LSB 0x0 +#define QIB_7322_Control_SyncReset_MSB 0x0 +#define QIB_7322_Control_SyncReset_RMASK 0x1 + +#define QIB_7322_PageAlign_OFFS 0x10 +#define QIB_7322_PageAlign_DEF 0x0000000000001000 + +#define QIB_7322_ContextCnt_OFFS 0x18 +#define QIB_7322_ContextCnt_DEF 0x0000000000000012 + +#define QIB_7322_Scratch_OFFS 0x20 +#define QIB_7322_Scratch_DEF 0x0000000000000000 + +#define QIB_7322_CntrRegBase_OFFS 0x28 +#define QIB_7322_CntrRegBase_DEF 0x0000000000011000 + +#define QIB_7322_SendRegBase_OFFS 0x30 +#define QIB_7322_SendRegBase_DEF 0x0000000000003000 + +#define QIB_7322_UserRegBase_OFFS 0x38 +#define QIB_7322_UserRegBase_DEF 0x0000000000200000 + +#define QIB_7322_IntMask_OFFS 0x68 +#define QIB_7322_IntMask_DEF 0x0000000000000000 +#define QIB_7322_IntMask_SDmaIntMask_1_LSB 0x3F +#define QIB_7322_IntMask_SDmaIntMask_1_MSB 0x3F +#define QIB_7322_IntMask_SDmaIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaIntMask_0_LSB 0x3E +#define QIB_7322_IntMask_SDmaIntMask_0_MSB 0x3E +#define QIB_7322_IntMask_SDmaIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SDmaProgressIntMask_1_LSB 0x3D +#define QIB_7322_IntMask_SDmaProgressIntMask_1_MSB 0x3D +#define QIB_7322_IntMask_SDmaProgressIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaProgressIntMask_0_LSB 0x3C +#define QIB_7322_IntMask_SDmaProgressIntMask_0_MSB 0x3C +#define QIB_7322_IntMask_SDmaProgressIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SDmaIdleIntMask_1_LSB 0x3B +#define QIB_7322_IntMask_SDmaIdleIntMask_1_MSB 0x3B +#define QIB_7322_IntMask_SDmaIdleIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaIdleIntMask_0_LSB 0x3A +#define QIB_7322_IntMask_SDmaIdleIntMask_0_MSB 0x3A +#define QIB_7322_IntMask_SDmaIdleIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_LSB 0x39 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_MSB 0x39 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_LSB 0x38 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_MSB 0x38 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg17IntMask_LSB 0x31 +#define QIB_7322_IntMask_RcvUrg17IntMask_MSB 0x31 +#define QIB_7322_IntMask_RcvUrg17IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg16IntMask_LSB 0x30 +#define QIB_7322_IntMask_RcvUrg16IntMask_MSB 0x30 +#define QIB_7322_IntMask_RcvUrg16IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg15IntMask_LSB 0x2F +#define QIB_7322_IntMask_RcvUrg15IntMask_MSB 0x2F +#define QIB_7322_IntMask_RcvUrg15IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg14IntMask_LSB 0x2E +#define QIB_7322_IntMask_RcvUrg14IntMask_MSB 0x2E +#define QIB_7322_IntMask_RcvUrg14IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg13IntMask_LSB 0x2D +#define QIB_7322_IntMask_RcvUrg13IntMask_MSB 0x2D +#define QIB_7322_IntMask_RcvUrg13IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg12IntMask_LSB 0x2C +#define QIB_7322_IntMask_RcvUrg12IntMask_MSB 0x2C +#define QIB_7322_IntMask_RcvUrg12IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg11IntMask_LSB 0x2B +#define QIB_7322_IntMask_RcvUrg11IntMask_MSB 0x2B +#define QIB_7322_IntMask_RcvUrg11IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg10IntMask_LSB 0x2A +#define QIB_7322_IntMask_RcvUrg10IntMask_MSB 0x2A +#define QIB_7322_IntMask_RcvUrg10IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg9IntMask_LSB 0x29 +#define QIB_7322_IntMask_RcvUrg9IntMask_MSB 0x29 +#define QIB_7322_IntMask_RcvUrg9IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg8IntMask_LSB 0x28 +#define QIB_7322_IntMask_RcvUrg8IntMask_MSB 0x28 +#define QIB_7322_IntMask_RcvUrg8IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg7IntMask_LSB 0x27 +#define QIB_7322_IntMask_RcvUrg7IntMask_MSB 0x27 +#define QIB_7322_IntMask_RcvUrg7IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg6IntMask_LSB 0x26 +#define QIB_7322_IntMask_RcvUrg6IntMask_MSB 0x26 +#define QIB_7322_IntMask_RcvUrg6IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg5IntMask_LSB 0x25 +#define QIB_7322_IntMask_RcvUrg5IntMask_MSB 0x25 +#define QIB_7322_IntMask_RcvUrg5IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg4IntMask_LSB 0x24 +#define QIB_7322_IntMask_RcvUrg4IntMask_MSB 0x24 +#define QIB_7322_IntMask_RcvUrg4IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg3IntMask_LSB 0x23 +#define QIB_7322_IntMask_RcvUrg3IntMask_MSB 0x23 +#define QIB_7322_IntMask_RcvUrg3IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg2IntMask_LSB 0x22 +#define QIB_7322_IntMask_RcvUrg2IntMask_MSB 0x22 +#define QIB_7322_IntMask_RcvUrg2IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg1IntMask_LSB 0x21 +#define QIB_7322_IntMask_RcvUrg1IntMask_MSB 0x21 +#define QIB_7322_IntMask_RcvUrg1IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg0IntMask_LSB 0x20 +#define QIB_7322_IntMask_RcvUrg0IntMask_MSB 0x20 +#define QIB_7322_IntMask_RcvUrg0IntMask_RMASK 0x1 +#define QIB_7322_IntMask_ErrIntMask_1_LSB 0x1F +#define QIB_7322_IntMask_ErrIntMask_1_MSB 0x1F +#define QIB_7322_IntMask_ErrIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_ErrIntMask_0_LSB 0x1E +#define QIB_7322_IntMask_ErrIntMask_0_MSB 0x1E +#define QIB_7322_IntMask_ErrIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_ErrIntMask_LSB 0x1D +#define QIB_7322_IntMask_ErrIntMask_MSB 0x1D +#define QIB_7322_IntMask_ErrIntMask_RMASK 0x1 +#define QIB_7322_IntMask_AssertGPIOIntMask_LSB 0x1C +#define QIB_7322_IntMask_AssertGPIOIntMask_MSB 0x1C +#define QIB_7322_IntMask_AssertGPIOIntMask_RMASK 0x1 +#define QIB_7322_IntMask_SendDoneIntMask_1_LSB 0x19 +#define QIB_7322_IntMask_SendDoneIntMask_1_MSB 0x19 +#define QIB_7322_IntMask_SendDoneIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SendDoneIntMask_0_LSB 0x18 +#define QIB_7322_IntMask_SendDoneIntMask_0_MSB 0x18 +#define QIB_7322_IntMask_SendDoneIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SendBufAvailIntMask_LSB 0x17 +#define QIB_7322_IntMask_SendBufAvailIntMask_MSB 0x17 +#define QIB_7322_IntMask_SendBufAvailIntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail17IntMask_LSB 0x11 +#define QIB_7322_IntMask_RcvAvail17IntMask_MSB 0x11 +#define QIB_7322_IntMask_RcvAvail17IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail16IntMask_LSB 0x10 +#define QIB_7322_IntMask_RcvAvail16IntMask_MSB 0x10 +#define QIB_7322_IntMask_RcvAvail16IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail15IntMask_LSB 0xF +#define QIB_7322_IntMask_RcvAvail15IntMask_MSB 0xF +#define QIB_7322_IntMask_RcvAvail15IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail14IntMask_LSB 0xE +#define QIB_7322_IntMask_RcvAvail14IntMask_MSB 0xE +#define QIB_7322_IntMask_RcvAvail14IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail13IntMask_LSB 0xD +#define QIB_7322_IntMask_RcvAvail13IntMask_MSB 0xD +#define QIB_7322_IntMask_RcvAvail13IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail12IntMask_LSB 0xC +#define QIB_7322_IntMask_RcvAvail12IntMask_MSB 0xC +#define QIB_7322_IntMask_RcvAvail12IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail11IntMask_LSB 0xB +#define QIB_7322_IntMask_RcvAvail11IntMask_MSB 0xB +#define QIB_7322_IntMask_RcvAvail11IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail10IntMask_LSB 0xA +#define QIB_7322_IntMask_RcvAvail10IntMask_MSB 0xA +#define QIB_7322_IntMask_RcvAvail10IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail9IntMask_LSB 0x9 +#define QIB_7322_IntMask_RcvAvail9IntMask_MSB 0x9 +#define QIB_7322_IntMask_RcvAvail9IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail8IntMask_LSB 0x8 +#define QIB_7322_IntMask_RcvAvail8IntMask_MSB 0x8 +#define QIB_7322_IntMask_RcvAvail8IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail7IntMask_LSB 0x7 +#define QIB_7322_IntMask_RcvAvail7IntMask_MSB 0x7 +#define QIB_7322_IntMask_RcvAvail7IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail6IntMask_LSB 0x6 +#define QIB_7322_IntMask_RcvAvail6IntMask_MSB 0x6 +#define QIB_7322_IntMask_RcvAvail6IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail5IntMask_LSB 0x5 +#define QIB_7322_IntMask_RcvAvail5IntMask_MSB 0x5 +#define QIB_7322_IntMask_RcvAvail5IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail4IntMask_LSB 0x4 +#define QIB_7322_IntMask_RcvAvail4IntMask_MSB 0x4 +#define QIB_7322_IntMask_RcvAvail4IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail3IntMask_LSB 0x3 +#define QIB_7322_IntMask_RcvAvail3IntMask_MSB 0x3 +#define QIB_7322_IntMask_RcvAvail3IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail2IntMask_LSB 0x2 +#define QIB_7322_IntMask_RcvAvail2IntMask_MSB 0x2 +#define QIB_7322_IntMask_RcvAvail2IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail1IntMask_LSB 0x1 +#define QIB_7322_IntMask_RcvAvail1IntMask_MSB 0x1 +#define QIB_7322_IntMask_RcvAvail1IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail0IntMask_LSB 0x0 +#define QIB_7322_IntMask_RcvAvail0IntMask_MSB 0x0 +#define QIB_7322_IntMask_RcvAvail0IntMask_RMASK 0x1 + +#define QIB_7322_IntStatus_OFFS 0x70 +#define QIB_7322_IntStatus_DEF 0x0000000000000000 +#define QIB_7322_IntStatus_SDmaInt_1_LSB 0x3F +#define QIB_7322_IntStatus_SDmaInt_1_MSB 0x3F +#define QIB_7322_IntStatus_SDmaInt_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaInt_0_LSB 0x3E +#define QIB_7322_IntStatus_SDmaInt_0_MSB 0x3E +#define QIB_7322_IntStatus_SDmaInt_0_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaProgressInt_1_LSB 0x3D +#define QIB_7322_IntStatus_SDmaProgressInt_1_MSB 0x3D +#define QIB_7322_IntStatus_SDmaProgressInt_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaProgressInt_0_LSB 0x3C +#define QIB_7322_IntStatus_SDmaProgressInt_0_MSB 0x3C +#define QIB_7322_IntStatus_SDmaProgressInt_0_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaIdleInt_1_LSB 0x3B +#define QIB_7322_IntStatus_SDmaIdleInt_1_MSB 0x3B +#define QIB_7322_IntStatus_SDmaIdleInt_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaIdleInt_0_LSB 0x3A +#define QIB_7322_IntStatus_SDmaIdleInt_0_MSB 0x3A +#define QIB_7322_IntStatus_SDmaIdleInt_0_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaCleanupDone_1_LSB 0x39 +#define QIB_7322_IntStatus_SDmaCleanupDone_1_MSB 0x39 +#define QIB_7322_IntStatus_SDmaCleanupDone_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaCleanupDone_0_LSB 0x38 +#define QIB_7322_IntStatus_SDmaCleanupDone_0_MSB 0x38 +#define QIB_7322_IntStatus_SDmaCleanupDone_0_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg17_LSB 0x31 +#define QIB_7322_IntStatus_RcvUrg17_MSB 0x31 +#define QIB_7322_IntStatus_RcvUrg17_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg16_LSB 0x30 +#define QIB_7322_IntStatus_RcvUrg16_MSB 0x30 +#define QIB_7322_IntStatus_RcvUrg16_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg15_LSB 0x2F +#define QIB_7322_IntStatus_RcvUrg15_MSB 0x2F +#define QIB_7322_IntStatus_RcvUrg15_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg14_LSB 0x2E +#define QIB_7322_IntStatus_RcvUrg14_MSB 0x2E +#define QIB_7322_IntStatus_RcvUrg14_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg13_LSB 0x2D +#define QIB_7322_IntStatus_RcvUrg13_MSB 0x2D +#define QIB_7322_IntStatus_RcvUrg13_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg12_LSB 0x2C +#define QIB_7322_IntStatus_RcvUrg12_MSB 0x2C +#define QIB_7322_IntStatus_RcvUrg12_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg11_LSB 0x2B +#define QIB_7322_IntStatus_RcvUrg11_MSB 0x2B +#define QIB_7322_IntStatus_RcvUrg11_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg10_LSB 0x2A +#define QIB_7322_IntStatus_RcvUrg10_MSB 0x2A +#define QIB_7322_IntStatus_RcvUrg10_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg9_LSB 0x29 +#define QIB_7322_IntStatus_RcvUrg9_MSB 0x29 +#define QIB_7322_IntStatus_RcvUrg9_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg8_LSB 0x28 +#define QIB_7322_IntStatus_RcvUrg8_MSB 0x28 +#define QIB_7322_IntStatus_RcvUrg8_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg7_LSB 0x27 +#define QIB_7322_IntStatus_RcvUrg7_MSB 0x27 +#define QIB_7322_IntStatus_RcvUrg7_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg6_LSB 0x26 +#define QIB_7322_IntStatus_RcvUrg6_MSB 0x26 +#define QIB_7322_IntStatus_RcvUrg6_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg5_LSB 0x25 +#define QIB_7322_IntStatus_RcvUrg5_MSB 0x25 +#define QIB_7322_IntStatus_RcvUrg5_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg4_LSB 0x24 +#define QIB_7322_IntStatus_RcvUrg4_MSB 0x24 +#define QIB_7322_IntStatus_RcvUrg4_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg3_LSB 0x23 +#define QIB_7322_IntStatus_RcvUrg3_MSB 0x23 +#define QIB_7322_IntStatus_RcvUrg3_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg2_LSB 0x22 +#define QIB_7322_IntStatus_RcvUrg2_MSB 0x22 +#define QIB_7322_IntStatus_RcvUrg2_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg1_LSB 0x21 +#define QIB_7322_IntStatus_RcvUrg1_MSB 0x21 +#define QIB_7322_IntStatus_RcvUrg1_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg0_LSB 0x20 +#define QIB_7322_IntStatus_RcvUrg0_MSB 0x20 +#define QIB_7322_IntStatus_RcvUrg0_RMASK 0x1 +#define QIB_7322_IntStatus_Err_1_LSB 0x1F +#define QIB_7322_IntStatus_Err_1_MSB 0x1F +#define QIB_7322_IntStatus_Err_1_RMASK 0x1 +#define QIB_7322_IntStatus_Err_0_LSB 0x1E +#define QIB_7322_IntStatus_Err_0_MSB 0x1E +#define QIB_7322_IntStatus_Err_0_RMASK 0x1 +#define QIB_7322_IntStatus_Err_LSB 0x1D +#define QIB_7322_IntStatus_Err_MSB 0x1D +#define QIB_7322_IntStatus_Err_RMASK 0x1 +#define QIB_7322_IntStatus_AssertGPIO_LSB 0x1C +#define QIB_7322_IntStatus_AssertGPIO_MSB 0x1C +#define QIB_7322_IntStatus_AssertGPIO_RMASK 0x1 +#define QIB_7322_IntStatus_SendDone_1_LSB 0x19 +#define QIB_7322_IntStatus_SendDone_1_MSB 0x19 +#define QIB_7322_IntStatus_SendDone_1_RMASK 0x1 +#define QIB_7322_IntStatus_SendDone_0_LSB 0x18 +#define QIB_7322_IntStatus_SendDone_0_MSB 0x18 +#define QIB_7322_IntStatus_SendDone_0_RMASK 0x1 +#define QIB_7322_IntStatus_SendBufAvail_LSB 0x17 +#define QIB_7322_IntStatus_SendBufAvail_MSB 0x17 +#define QIB_7322_IntStatus_SendBufAvail_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail17_LSB 0x11 +#define QIB_7322_IntStatus_RcvAvail17_MSB 0x11 +#define QIB_7322_IntStatus_RcvAvail17_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail16_LSB 0x10 +#define QIB_7322_IntStatus_RcvAvail16_MSB 0x10 +#define QIB_7322_IntStatus_RcvAvail16_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail15_LSB 0xF +#define QIB_7322_IntStatus_RcvAvail15_MSB 0xF +#define QIB_7322_IntStatus_RcvAvail15_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail14_LSB 0xE +#define QIB_7322_IntStatus_RcvAvail14_MSB 0xE +#define QIB_7322_IntStatus_RcvAvail14_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail13_LSB 0xD +#define QIB_7322_IntStatus_RcvAvail13_MSB 0xD +#define QIB_7322_IntStatus_RcvAvail13_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail12_LSB 0xC +#define QIB_7322_IntStatus_RcvAvail12_MSB 0xC +#define QIB_7322_IntStatus_RcvAvail12_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail11_LSB 0xB +#define QIB_7322_IntStatus_RcvAvail11_MSB 0xB +#define QIB_7322_IntStatus_RcvAvail11_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail10_LSB 0xA +#define QIB_7322_IntStatus_RcvAvail10_MSB 0xA +#define QIB_7322_IntStatus_RcvAvail10_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail9_LSB 0x9 +#define QIB_7322_IntStatus_RcvAvail9_MSB 0x9 +#define QIB_7322_IntStatus_RcvAvail9_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail8_LSB 0x8 +#define QIB_7322_IntStatus_RcvAvail8_MSB 0x8 +#define QIB_7322_IntStatus_RcvAvail8_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail7_LSB 0x7 +#define QIB_7322_IntStatus_RcvAvail7_MSB 0x7 +#define QIB_7322_IntStatus_RcvAvail7_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail6_LSB 0x6 +#define QIB_7322_IntStatus_RcvAvail6_MSB 0x6 +#define QIB_7322_IntStatus_RcvAvail6_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail5_LSB 0x5 +#define QIB_7322_IntStatus_RcvAvail5_MSB 0x5 +#define QIB_7322_IntStatus_RcvAvail5_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail4_LSB 0x4 +#define QIB_7322_IntStatus_RcvAvail4_MSB 0x4 +#define QIB_7322_IntStatus_RcvAvail4_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail3_LSB 0x3 +#define QIB_7322_IntStatus_RcvAvail3_MSB 0x3 +#define QIB_7322_IntStatus_RcvAvail3_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail2_LSB 0x2 +#define QIB_7322_IntStatus_RcvAvail2_MSB 0x2 +#define QIB_7322_IntStatus_RcvAvail2_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail1_LSB 0x1 +#define QIB_7322_IntStatus_RcvAvail1_MSB 0x1 +#define QIB_7322_IntStatus_RcvAvail1_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail0_LSB 0x0 +#define QIB_7322_IntStatus_RcvAvail0_MSB 0x0 +#define QIB_7322_IntStatus_RcvAvail0_RMASK 0x1 + +#define QIB_7322_IntClear_OFFS 0x78 +#define QIB_7322_IntClear_DEF 0x0000000000000000 +#define QIB_7322_IntClear_SDmaIntClear_1_LSB 0x3F +#define QIB_7322_IntClear_SDmaIntClear_1_MSB 0x3F +#define QIB_7322_IntClear_SDmaIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaIntClear_0_LSB 0x3E +#define QIB_7322_IntClear_SDmaIntClear_0_MSB 0x3E +#define QIB_7322_IntClear_SDmaIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SDmaProgressIntClear_1_LSB 0x3D +#define QIB_7322_IntClear_SDmaProgressIntClear_1_MSB 0x3D +#define QIB_7322_IntClear_SDmaProgressIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaProgressIntClear_0_LSB 0x3C +#define QIB_7322_IntClear_SDmaProgressIntClear_0_MSB 0x3C +#define QIB_7322_IntClear_SDmaProgressIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SDmaIdleIntClear_1_LSB 0x3B +#define QIB_7322_IntClear_SDmaIdleIntClear_1_MSB 0x3B +#define QIB_7322_IntClear_SDmaIdleIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaIdleIntClear_0_LSB 0x3A +#define QIB_7322_IntClear_SDmaIdleIntClear_0_MSB 0x3A +#define QIB_7322_IntClear_SDmaIdleIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_LSB 0x39 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_MSB 0x39 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_LSB 0x38 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_MSB 0x38 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg17IntClear_LSB 0x31 +#define QIB_7322_IntClear_RcvUrg17IntClear_MSB 0x31 +#define QIB_7322_IntClear_RcvUrg17IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg16IntClear_LSB 0x30 +#define QIB_7322_IntClear_RcvUrg16IntClear_MSB 0x30 +#define QIB_7322_IntClear_RcvUrg16IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg15IntClear_LSB 0x2F +#define QIB_7322_IntClear_RcvUrg15IntClear_MSB 0x2F +#define QIB_7322_IntClear_RcvUrg15IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg14IntClear_LSB 0x2E +#define QIB_7322_IntClear_RcvUrg14IntClear_MSB 0x2E +#define QIB_7322_IntClear_RcvUrg14IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg13IntClear_LSB 0x2D +#define QIB_7322_IntClear_RcvUrg13IntClear_MSB 0x2D +#define QIB_7322_IntClear_RcvUrg13IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg12IntClear_LSB 0x2C +#define QIB_7322_IntClear_RcvUrg12IntClear_MSB 0x2C +#define QIB_7322_IntClear_RcvUrg12IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg11IntClear_LSB 0x2B +#define QIB_7322_IntClear_RcvUrg11IntClear_MSB 0x2B +#define QIB_7322_IntClear_RcvUrg11IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg10IntClear_LSB 0x2A +#define QIB_7322_IntClear_RcvUrg10IntClear_MSB 0x2A +#define QIB_7322_IntClear_RcvUrg10IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg9IntClear_LSB 0x29 +#define QIB_7322_IntClear_RcvUrg9IntClear_MSB 0x29 +#define QIB_7322_IntClear_RcvUrg9IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg8IntClear_LSB 0x28 +#define QIB_7322_IntClear_RcvUrg8IntClear_MSB 0x28 +#define QIB_7322_IntClear_RcvUrg8IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg7IntClear_LSB 0x27 +#define QIB_7322_IntClear_RcvUrg7IntClear_MSB 0x27 +#define QIB_7322_IntClear_RcvUrg7IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg6IntClear_LSB 0x26 +#define QIB_7322_IntClear_RcvUrg6IntClear_MSB 0x26 +#define QIB_7322_IntClear_RcvUrg6IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg5IntClear_LSB 0x25 +#define QIB_7322_IntClear_RcvUrg5IntClear_MSB 0x25 +#define QIB_7322_IntClear_RcvUrg5IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg4IntClear_LSB 0x24 +#define QIB_7322_IntClear_RcvUrg4IntClear_MSB 0x24 +#define QIB_7322_IntClear_RcvUrg4IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg3IntClear_LSB 0x23 +#define QIB_7322_IntClear_RcvUrg3IntClear_MSB 0x23 +#define QIB_7322_IntClear_RcvUrg3IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg2IntClear_LSB 0x22 +#define QIB_7322_IntClear_RcvUrg2IntClear_MSB 0x22 +#define QIB_7322_IntClear_RcvUrg2IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg1IntClear_LSB 0x21 +#define QIB_7322_IntClear_RcvUrg1IntClear_MSB 0x21 +#define QIB_7322_IntClear_RcvUrg1IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg0IntClear_LSB 0x20 +#define QIB_7322_IntClear_RcvUrg0IntClear_MSB 0x20 +#define QIB_7322_IntClear_RcvUrg0IntClear_RMASK 0x1 +#define QIB_7322_IntClear_ErrIntClear_1_LSB 0x1F +#define QIB_7322_IntClear_ErrIntClear_1_MSB 0x1F +#define QIB_7322_IntClear_ErrIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_ErrIntClear_0_LSB 0x1E +#define QIB_7322_IntClear_ErrIntClear_0_MSB 0x1E +#define QIB_7322_IntClear_ErrIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_ErrIntClear_LSB 0x1D +#define QIB_7322_IntClear_ErrIntClear_MSB 0x1D +#define QIB_7322_IntClear_ErrIntClear_RMASK 0x1 +#define QIB_7322_IntClear_AssertGPIOIntClear_LSB 0x1C +#define QIB_7322_IntClear_AssertGPIOIntClear_MSB 0x1C +#define QIB_7322_IntClear_AssertGPIOIntClear_RMASK 0x1 +#define QIB_7322_IntClear_SendDoneIntClear_1_LSB 0x19 +#define QIB_7322_IntClear_SendDoneIntClear_1_MSB 0x19 +#define QIB_7322_IntClear_SendDoneIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SendDoneIntClear_0_LSB 0x18 +#define QIB_7322_IntClear_SendDoneIntClear_0_MSB 0x18 +#define QIB_7322_IntClear_SendDoneIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SendBufAvailIntClear_LSB 0x17 +#define QIB_7322_IntClear_SendBufAvailIntClear_MSB 0x17 +#define QIB_7322_IntClear_SendBufAvailIntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail17IntClear_LSB 0x11 +#define QIB_7322_IntClear_RcvAvail17IntClear_MSB 0x11 +#define QIB_7322_IntClear_RcvAvail17IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail16IntClear_LSB 0x10 +#define QIB_7322_IntClear_RcvAvail16IntClear_MSB 0x10 +#define QIB_7322_IntClear_RcvAvail16IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail15IntClear_LSB 0xF +#define QIB_7322_IntClear_RcvAvail15IntClear_MSB 0xF +#define QIB_7322_IntClear_RcvAvail15IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail14IntClear_LSB 0xE +#define QIB_7322_IntClear_RcvAvail14IntClear_MSB 0xE +#define QIB_7322_IntClear_RcvAvail14IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail13IntClear_LSB 0xD +#define QIB_7322_IntClear_RcvAvail13IntClear_MSB 0xD +#define QIB_7322_IntClear_RcvAvail13IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail12IntClear_LSB 0xC +#define QIB_7322_IntClear_RcvAvail12IntClear_MSB 0xC +#define QIB_7322_IntClear_RcvAvail12IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail11IntClear_LSB 0xB +#define QIB_7322_IntClear_RcvAvail11IntClear_MSB 0xB +#define QIB_7322_IntClear_RcvAvail11IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail10IntClear_LSB 0xA +#define QIB_7322_IntClear_RcvAvail10IntClear_MSB 0xA +#define QIB_7322_IntClear_RcvAvail10IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail9IntClear_LSB 0x9 +#define QIB_7322_IntClear_RcvAvail9IntClear_MSB 0x9 +#define QIB_7322_IntClear_RcvAvail9IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail8IntClear_LSB 0x8 +#define QIB_7322_IntClear_RcvAvail8IntClear_MSB 0x8 +#define QIB_7322_IntClear_RcvAvail8IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail7IntClear_LSB 0x7 +#define QIB_7322_IntClear_RcvAvail7IntClear_MSB 0x7 +#define QIB_7322_IntClear_RcvAvail7IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail6IntClear_LSB 0x6 +#define QIB_7322_IntClear_RcvAvail6IntClear_MSB 0x6 +#define QIB_7322_IntClear_RcvAvail6IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail5IntClear_LSB 0x5 +#define QIB_7322_IntClear_RcvAvail5IntClear_MSB 0x5 +#define QIB_7322_IntClear_RcvAvail5IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail4IntClear_LSB 0x4 +#define QIB_7322_IntClear_RcvAvail4IntClear_MSB 0x4 +#define QIB_7322_IntClear_RcvAvail4IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail3IntClear_LSB 0x3 +#define QIB_7322_IntClear_RcvAvail3IntClear_MSB 0x3 +#define QIB_7322_IntClear_RcvAvail3IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail2IntClear_LSB 0x2 +#define QIB_7322_IntClear_RcvAvail2IntClear_MSB 0x2 +#define QIB_7322_IntClear_RcvAvail2IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail1IntClear_LSB 0x1 +#define QIB_7322_IntClear_RcvAvail1IntClear_MSB 0x1 +#define QIB_7322_IntClear_RcvAvail1IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail0IntClear_LSB 0x0 +#define QIB_7322_IntClear_RcvAvail0IntClear_MSB 0x0 +#define QIB_7322_IntClear_RcvAvail0IntClear_RMASK 0x1 + +#define QIB_7322_ErrMask_OFFS 0x80 +#define QIB_7322_ErrMask_DEF 0x0000000000000000 +#define QIB_7322_ErrMask_ResetNegatedMask_LSB 0x3F +#define QIB_7322_ErrMask_ResetNegatedMask_MSB 0x3F +#define QIB_7322_ErrMask_ResetNegatedMask_RMASK 0x1 +#define QIB_7322_ErrMask_HardwareErrMask_LSB 0x3E +#define QIB_7322_ErrMask_HardwareErrMask_MSB 0x3E +#define QIB_7322_ErrMask_HardwareErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_InvalidAddrErrMask_LSB 0x3D +#define QIB_7322_ErrMask_InvalidAddrErrMask_MSB 0x3D +#define QIB_7322_ErrMask_InvalidAddrErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SDmaVL15ErrMask_LSB 0x38 +#define QIB_7322_ErrMask_SDmaVL15ErrMask_MSB 0x38 +#define QIB_7322_ErrMask_SDmaVL15ErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_LSB 0x37 +#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_MSB 0x37 +#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_InvalidEEPCmdMask_LSB 0x35 +#define QIB_7322_ErrMask_InvalidEEPCmdMask_MSB 0x35 +#define QIB_7322_ErrMask_InvalidEEPCmdMask_RMASK 0x1 +#define QIB_7322_ErrMask_RcvContextShareErrMask_LSB 0x34 +#define QIB_7322_ErrMask_RcvContextShareErrMask_MSB 0x34 +#define QIB_7322_ErrMask_RcvContextShareErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SendVLMismatchErrMask_LSB 0x24 +#define QIB_7322_ErrMask_SendVLMismatchErrMask_MSB 0x24 +#define QIB_7322_ErrMask_SendVLMismatchErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SendArmLaunchErrMask_LSB 0x23 +#define QIB_7322_ErrMask_SendArmLaunchErrMask_MSB 0x23 +#define QIB_7322_ErrMask_SendArmLaunchErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B +#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_MSB 0x1B +#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SDmaWrongPortErrMask_LSB 0x1A +#define QIB_7322_ErrMask_SDmaWrongPortErrMask_MSB 0x1A +#define QIB_7322_ErrMask_SDmaWrongPortErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_LSB 0x19 +#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_MSB 0x19 +#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_RcvHdrFullErrMask_LSB 0xD +#define QIB_7322_ErrMask_RcvHdrFullErrMask_MSB 0xD +#define QIB_7322_ErrMask_RcvHdrFullErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_RcvEgrFullErrMask_LSB 0xC +#define QIB_7322_ErrMask_RcvEgrFullErrMask_MSB 0xC +#define QIB_7322_ErrMask_RcvEgrFullErrMask_RMASK 0x1 + +#define QIB_7322_ErrStatus_OFFS 0x88 +#define QIB_7322_ErrStatus_DEF 0x0000000000000000 +#define QIB_7322_ErrStatus_ResetNegated_LSB 0x3F +#define QIB_7322_ErrStatus_ResetNegated_MSB 0x3F +#define QIB_7322_ErrStatus_ResetNegated_RMASK 0x1 +#define QIB_7322_ErrStatus_HardwareErr_LSB 0x3E +#define QIB_7322_ErrStatus_HardwareErr_MSB 0x3E +#define QIB_7322_ErrStatus_HardwareErr_RMASK 0x1 +#define QIB_7322_ErrStatus_InvalidAddrErr_LSB 0x3D +#define QIB_7322_ErrStatus_InvalidAddrErr_MSB 0x3D +#define QIB_7322_ErrStatus_InvalidAddrErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SDmaVL15Err_LSB 0x38 +#define QIB_7322_ErrStatus_SDmaVL15Err_MSB 0x38 +#define QIB_7322_ErrStatus_SDmaVL15Err_RMASK 0x1 +#define QIB_7322_ErrStatus_SBufVL15MisUseErr_LSB 0x37 +#define QIB_7322_ErrStatus_SBufVL15MisUseErr_MSB 0x37 +#define QIB_7322_ErrStatus_SBufVL15MisUseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_InvalidEEPCmdErr_LSB 0x35 +#define QIB_7322_ErrStatus_InvalidEEPCmdErr_MSB 0x35 +#define QIB_7322_ErrStatus_InvalidEEPCmdErr_RMASK 0x1 +#define QIB_7322_ErrStatus_RcvContextShareErr_LSB 0x34 +#define QIB_7322_ErrStatus_RcvContextShareErr_MSB 0x34 +#define QIB_7322_ErrStatus_RcvContextShareErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SendVLMismatchErr_LSB 0x24 +#define QIB_7322_ErrStatus_SendVLMismatchErr_MSB 0x24 +#define QIB_7322_ErrStatus_SendVLMismatchErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SendArmLaunchErr_LSB 0x23 +#define QIB_7322_ErrStatus_SendArmLaunchErr_MSB 0x23 +#define QIB_7322_ErrStatus_SendArmLaunchErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SendSpecialTriggerErr_LSB 0x1B +#define QIB_7322_ErrStatus_SendSpecialTriggerErr_MSB 0x1B +#define QIB_7322_ErrStatus_SendSpecialTriggerErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SDmaWrongPortErr_LSB 0x1A +#define QIB_7322_ErrStatus_SDmaWrongPortErr_MSB 0x1A +#define QIB_7322_ErrStatus_SDmaWrongPortErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_LSB 0x19 +#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_MSB 0x19 +#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_RMASK 0x1 +#define QIB_7322_ErrStatus_RcvHdrFullErr_LSB 0xD +#define QIB_7322_ErrStatus_RcvHdrFullErr_MSB 0xD +#define QIB_7322_ErrStatus_RcvHdrFullErr_RMASK 0x1 +#define QIB_7322_ErrStatus_RcvEgrFullErr_LSB 0xC +#define QIB_7322_ErrStatus_RcvEgrFullErr_MSB 0xC +#define QIB_7322_ErrStatus_RcvEgrFullErr_RMASK 0x1 + +#define QIB_7322_ErrClear_OFFS 0x90 +#define QIB_7322_ErrClear_DEF 0x0000000000000000 +#define QIB_7322_ErrClear_ResetNegatedClear_LSB 0x3F +#define QIB_7322_ErrClear_ResetNegatedClear_MSB 0x3F +#define QIB_7322_ErrClear_ResetNegatedClear_RMASK 0x1 +#define QIB_7322_ErrClear_HardwareErrClear_LSB 0x3E +#define QIB_7322_ErrClear_HardwareErrClear_MSB 0x3E +#define QIB_7322_ErrClear_HardwareErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_InvalidAddrErrClear_LSB 0x3D +#define QIB_7322_ErrClear_InvalidAddrErrClear_MSB 0x3D +#define QIB_7322_ErrClear_InvalidAddrErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SDmaVL15ErrClear_LSB 0x38 +#define QIB_7322_ErrClear_SDmaVL15ErrClear_MSB 0x38 +#define QIB_7322_ErrClear_SDmaVL15ErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_LSB 0x37 +#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_MSB 0x37 +#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_LSB 0x35 +#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_MSB 0x35 +#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_RcvContextShareErrClear_LSB 0x34 +#define QIB_7322_ErrClear_RcvContextShareErrClear_MSB 0x34 +#define QIB_7322_ErrClear_RcvContextShareErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SendVLMismatchErrMask_LSB 0x24 +#define QIB_7322_ErrClear_SendVLMismatchErrMask_MSB 0x24 +#define QIB_7322_ErrClear_SendVLMismatchErrMask_RMASK 0x1 +#define QIB_7322_ErrClear_SendArmLaunchErrClear_LSB 0x23 +#define QIB_7322_ErrClear_SendArmLaunchErrClear_MSB 0x23 +#define QIB_7322_ErrClear_SendArmLaunchErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B +#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_MSB 0x1B +#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SDmaWrongPortErrClear_LSB 0x1A +#define QIB_7322_ErrClear_SDmaWrongPortErrClear_MSB 0x1A +#define QIB_7322_ErrClear_SDmaWrongPortErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_LSB 0x19 +#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_MSB 0x19 +#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_RcvHdrFullErrClear_LSB 0xD +#define QIB_7322_ErrClear_RcvHdrFullErrClear_MSB 0xD +#define QIB_7322_ErrClear_RcvHdrFullErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_RcvEgrFullErrClear_LSB 0xC +#define QIB_7322_ErrClear_RcvEgrFullErrClear_MSB 0xC +#define QIB_7322_ErrClear_RcvEgrFullErrClear_RMASK 0x1 + +#define QIB_7322_HwErrMask_OFFS 0x98 +#define QIB_7322_HwErrMask_DEF 0x0000000000000000 +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_LSB 0x3F +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_MSB 0x3F +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_LSB 0x3E +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_MSB 0x3E +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_RMASK 0x1 +#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_LSB 0x37 +#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_MSB 0x37 +#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_RMASK 0x1 +#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_LSB 0x36 +#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_MSB 0x36 +#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1 +#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_LSB 0x35 +#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_MSB 0x35 +#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_RMASK 0x1 +#define QIB_7322_HwErrMask_MemoryErrMask_LSB 0x30 +#define QIB_7322_HwErrMask_MemoryErrMask_MSB 0x30 +#define QIB_7322_HwErrMask_MemoryErrMask_RMASK 0x1 +#define QIB_7322_HwErrMask_pcie_phy_txParityErr_LSB 0x22 +#define QIB_7322_HwErrMask_pcie_phy_txParityErr_MSB 0x22 +#define QIB_7322_HwErrMask_pcie_phy_txParityErr_RMASK 0x1 +#define QIB_7322_HwErrMask_PCIeBusParityErrMask_LSB 0x1F +#define QIB_7322_HwErrMask_PCIeBusParityErrMask_MSB 0x21 +#define QIB_7322_HwErrMask_PCIeBusParityErrMask_RMASK 0x7 +#define QIB_7322_HwErrMask_PcieCplTimeoutMask_LSB 0x1E +#define QIB_7322_HwErrMask_PcieCplTimeoutMask_MSB 0x1E +#define QIB_7322_HwErrMask_PcieCplTimeoutMask_RMASK 0x1 +#define QIB_7322_HwErrMask_PciePoisonedTLPMask_LSB 0x1D +#define QIB_7322_HwErrMask_PciePoisonedTLPMask_MSB 0x1D +#define QIB_7322_HwErrMask_PciePoisonedTLPMask_RMASK 0x1 +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_LSB 0x1C +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_MSB 0x1C +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_LSB 0x1B +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_MSB 0x1B +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_RMASK 0x1 +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_LSB 0xF +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_MSB 0xF +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_LSB 0xE +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_MSB 0xE +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_LSB 0xD +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_MSB 0xD +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_RMASK 0x1 +#define QIB_7322_HwErrMask_statusValidNoEopMask_LSB 0xC +#define QIB_7322_HwErrMask_statusValidNoEopMask_MSB 0xC +#define QIB_7322_HwErrMask_statusValidNoEopMask_RMASK 0x1 +#define QIB_7322_HwErrMask_LATriggeredMask_LSB 0xB +#define QIB_7322_HwErrMask_LATriggeredMask_MSB 0xB +#define QIB_7322_HwErrMask_LATriggeredMask_RMASK 0x1 + +#define QIB_7322_HwErrStatus_OFFS 0xA0 +#define QIB_7322_HwErrStatus_DEF 0x0000000000000000 +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_LSB 0x3F +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_MSB 0x3F +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_LSB 0x3E +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_MSB 0x3E +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_RMASK 0x1 +#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_LSB 0x37 +#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_MSB 0x37 +#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_RMASK 0x1 +#define QIB_7322_HwErrStatus_PowerOnBISTFailed_LSB 0x36 +#define QIB_7322_HwErrStatus_PowerOnBISTFailed_MSB 0x36 +#define QIB_7322_HwErrStatus_PowerOnBISTFailed_RMASK 0x1 +#define QIB_7322_HwErrStatus_TempsenseTholdReached_LSB 0x35 +#define QIB_7322_HwErrStatus_TempsenseTholdReached_MSB 0x35 +#define QIB_7322_HwErrStatus_TempsenseTholdReached_RMASK 0x1 +#define QIB_7322_HwErrStatus_MemoryErr_LSB 0x30 +#define QIB_7322_HwErrStatus_MemoryErr_MSB 0x30 +#define QIB_7322_HwErrStatus_MemoryErr_RMASK 0x1 +#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_LSB 0x22 +#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_MSB 0x22 +#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_RMASK 0x1 +#define QIB_7322_HwErrStatus_PCIeBusParity_LSB 0x1F +#define QIB_7322_HwErrStatus_PCIeBusParity_MSB 0x21 +#define QIB_7322_HwErrStatus_PCIeBusParity_RMASK 0x7 +#define QIB_7322_HwErrStatus_PcieCplTimeout_LSB 0x1E +#define QIB_7322_HwErrStatus_PcieCplTimeout_MSB 0x1E +#define QIB_7322_HwErrStatus_PcieCplTimeout_RMASK 0x1 +#define QIB_7322_HwErrStatus_PciePoisonedTLP_LSB 0x1D +#define QIB_7322_HwErrStatus_PciePoisonedTLP_MSB 0x1D +#define QIB_7322_HwErrStatus_PciePoisonedTLP_RMASK 0x1 +#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_LSB 0x1C +#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_MSB 0x1C +#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_LSB 0x1B +#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_MSB 0x1B +#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_LSB 0xF +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_MSB 0xF +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_LSB 0xE +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_MSB 0xE +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_LSB 0xD +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_MSB 0xD +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_RMASK 0x1 +#define QIB_7322_HwErrStatus_statusValidNoEop_LSB 0xC +#define QIB_7322_HwErrStatus_statusValidNoEop_MSB 0xC +#define QIB_7322_HwErrStatus_statusValidNoEop_RMASK 0x1 +#define QIB_7322_HwErrStatus_LATriggered_LSB 0xB +#define QIB_7322_HwErrStatus_LATriggered_MSB 0xB +#define QIB_7322_HwErrStatus_LATriggered_RMASK 0x1 + +#define QIB_7322_HwErrClear_OFFS 0xA8 +#define QIB_7322_HwErrClear_DEF 0x0000000000000000 +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_LSB 0x3F +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_MSB 0x3F +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_LSB 0x3E +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_MSB 0x3E +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_RMASK 0x1 +#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_LSB 0x37 +#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_MSB 0x37 +#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_RMASK 0x1 +#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_LSB 0x36 +#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_MSB 0x36 +#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1 +#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_LSB 0x35 +#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_MSB 0x35 +#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_RMASK 0x1 +#define QIB_7322_HwErrClear_MemoryErrClear_LSB 0x30 +#define QIB_7322_HwErrClear_MemoryErrClear_MSB 0x30 +#define QIB_7322_HwErrClear_MemoryErrClear_RMASK 0x1 +#define QIB_7322_HwErrClear_pcie_phy_txParityErr_LSB 0x22 +#define QIB_7322_HwErrClear_pcie_phy_txParityErr_MSB 0x22 +#define QIB_7322_HwErrClear_pcie_phy_txParityErr_RMASK 0x1 +#define QIB_7322_HwErrClear_PCIeBusParityClear_LSB 0x1F +#define QIB_7322_HwErrClear_PCIeBusParityClear_MSB 0x21 +#define QIB_7322_HwErrClear_PCIeBusParityClear_RMASK 0x7 +#define QIB_7322_HwErrClear_PcieCplTimeoutClear_LSB 0x1E +#define QIB_7322_HwErrClear_PcieCplTimeoutClear_MSB 0x1E +#define QIB_7322_HwErrClear_PcieCplTimeoutClear_RMASK 0x1 +#define QIB_7322_HwErrClear_PciePoisonedTLPClear_LSB 0x1D +#define QIB_7322_HwErrClear_PciePoisonedTLPClear_MSB 0x1D +#define QIB_7322_HwErrClear_PciePoisonedTLPClear_RMASK 0x1 +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_LSB 0x1C +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_MSB 0x1C +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_LSB 0x1B +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_MSB 0x1B +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_RMASK 0x1 +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_LSB 0xF +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_MSB 0xF +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_LSB 0xE +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_MSB 0xE +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_LSB 0xD +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_MSB 0xD +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_RMASK 0x1 +#define QIB_7322_HwErrClear_statusValidNoEopClear_LSB 0xC +#define QIB_7322_HwErrClear_statusValidNoEopClear_MSB 0xC +#define QIB_7322_HwErrClear_statusValidNoEopClear_RMASK 0x1 +#define QIB_7322_HwErrClear_LATriggeredClear_LSB 0xB +#define QIB_7322_HwErrClear_LATriggeredClear_MSB 0xB +#define QIB_7322_HwErrClear_LATriggeredClear_RMASK 0x1 + +#define QIB_7322_HwDiagCtrl_OFFS 0xB0 +#define QIB_7322_HwDiagCtrl_DEF 0x0000000000000000 +#define QIB_7322_HwDiagCtrl_Diagnostic_LSB 0x3F +#define QIB_7322_HwDiagCtrl_Diagnostic_MSB 0x3F +#define QIB_7322_HwDiagCtrl_Diagnostic_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_CounterWrEnable_LSB 0x3D +#define QIB_7322_HwDiagCtrl_CounterWrEnable_MSB 0x3D +#define QIB_7322_HwDiagCtrl_CounterWrEnable_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_CounterDisable_LSB 0x3C +#define QIB_7322_HwDiagCtrl_CounterDisable_MSB 0x3C +#define QIB_7322_HwDiagCtrl_CounterDisable_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F +#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_MSB 0x22 +#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_LSB 0xF +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_MSB 0xF +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_LSB 0xE +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_MSB 0xE +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_LSB 0xD +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_MSB 0xD +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_LSB 0xC +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_MSB 0xC +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_RMASK 0x1 + +#define QIB_7322_EXTStatus_OFFS 0xC0 +#define QIB_7322_EXTStatus_DEF 0x000000000000X000 +#define QIB_7322_EXTStatus_GPIOIn_LSB 0x30 +#define QIB_7322_EXTStatus_GPIOIn_MSB 0x3F +#define QIB_7322_EXTStatus_GPIOIn_RMASK 0xFFFF +#define QIB_7322_EXTStatus_MemBISTDisabled_LSB 0xF +#define QIB_7322_EXTStatus_MemBISTDisabled_MSB 0xF +#define QIB_7322_EXTStatus_MemBISTDisabled_RMASK 0x1 +#define QIB_7322_EXTStatus_MemBISTEndTest_LSB 0xE +#define QIB_7322_EXTStatus_MemBISTEndTest_MSB 0xE +#define QIB_7322_EXTStatus_MemBISTEndTest_RMASK 0x1 + +#define QIB_7322_EXTCtrl_OFFS 0xC8 +#define QIB_7322_EXTCtrl_DEF 0x0000000000000000 +#define QIB_7322_EXTCtrl_GPIOOe_LSB 0x30 +#define QIB_7322_EXTCtrl_GPIOOe_MSB 0x3F +#define QIB_7322_EXTCtrl_GPIOOe_RMASK 0xFFFF +#define QIB_7322_EXTCtrl_GPIOInvert_LSB 0x20 +#define QIB_7322_EXTCtrl_GPIOInvert_MSB 0x2F +#define QIB_7322_EXTCtrl_GPIOInvert_RMASK 0xFFFF +#define QIB_7322_EXTCtrl_LEDPort1GreenOn_LSB 0x3 +#define QIB_7322_EXTCtrl_LEDPort1GreenOn_MSB 0x3 +#define QIB_7322_EXTCtrl_LEDPort1GreenOn_RMASK 0x1 +#define QIB_7322_EXTCtrl_LEDPort1YellowOn_LSB 0x2 +#define QIB_7322_EXTCtrl_LEDPort1YellowOn_MSB 0x2 +#define QIB_7322_EXTCtrl_LEDPort1YellowOn_RMASK 0x1 +#define QIB_7322_EXTCtrl_LEDPort0GreenOn_LSB 0x1 +#define QIB_7322_EXTCtrl_LEDPort0GreenOn_MSB 0x1 +#define QIB_7322_EXTCtrl_LEDPort0GreenOn_RMASK 0x1 +#define QIB_7322_EXTCtrl_LEDPort0YellowOn_LSB 0x0 +#define QIB_7322_EXTCtrl_LEDPort0YellowOn_MSB 0x0 +#define QIB_7322_EXTCtrl_LEDPort0YellowOn_RMASK 0x1 + +#define QIB_7322_GPIOOut_OFFS 0xE0 +#define QIB_7322_GPIOOut_DEF 0x0000000000000000 + +#define QIB_7322_GPIOMask_OFFS 0xE8 +#define QIB_7322_GPIOMask_DEF 0x0000000000000000 + +#define QIB_7322_GPIOStatus_OFFS 0xF0 +#define QIB_7322_GPIOStatus_DEF 0x0000000000000000 + +#define QIB_7322_GPIOClear_OFFS 0xF8 +#define QIB_7322_GPIOClear_DEF 0x0000000000000000 + +#define QIB_7322_RcvCtrl_OFFS 0x100 +#define QIB_7322_RcvCtrl_DEF 0x0000000000000000 +#define QIB_7322_RcvCtrl_TidReDirect_LSB 0x30 +#define QIB_7322_RcvCtrl_TidReDirect_MSB 0x3F +#define QIB_7322_RcvCtrl_TidReDirect_RMASK 0xFFFF +#define QIB_7322_RcvCtrl_TailUpd_LSB 0x2F +#define QIB_7322_RcvCtrl_TailUpd_MSB 0x2F +#define QIB_7322_RcvCtrl_TailUpd_RMASK 0x1 +#define QIB_7322_RcvCtrl_XrcTypeCode_LSB 0x2C +#define QIB_7322_RcvCtrl_XrcTypeCode_MSB 0x2E +#define QIB_7322_RcvCtrl_XrcTypeCode_RMASK 0x7 +#define QIB_7322_RcvCtrl_TidFlowEnable_LSB 0x2B +#define QIB_7322_RcvCtrl_TidFlowEnable_MSB 0x2B +#define QIB_7322_RcvCtrl_TidFlowEnable_RMASK 0x1 +#define QIB_7322_RcvCtrl_ContextCfg_LSB 0x29 +#define QIB_7322_RcvCtrl_ContextCfg_MSB 0x2A +#define QIB_7322_RcvCtrl_ContextCfg_RMASK 0x3 +#define QIB_7322_RcvCtrl_IntrAvail_LSB 0x14 +#define QIB_7322_RcvCtrl_IntrAvail_MSB 0x25 +#define QIB_7322_RcvCtrl_IntrAvail_RMASK 0x3FFFF +#define QIB_7322_RcvCtrl_dontDropRHQFull_LSB 0x0 +#define QIB_7322_RcvCtrl_dontDropRHQFull_MSB 0x11 +#define QIB_7322_RcvCtrl_dontDropRHQFull_RMASK 0x3FFFF + +#define QIB_7322_RcvHdrSize_OFFS 0x110 +#define QIB_7322_RcvHdrSize_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrCnt_OFFS 0x118 +#define QIB_7322_RcvHdrCnt_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrEntSize_OFFS 0x120 +#define QIB_7322_RcvHdrEntSize_DEF 0x0000000000000000 + +#define QIB_7322_RcvTIDBase_OFFS 0x128 +#define QIB_7322_RcvTIDBase_DEF 0x0000000000050000 + +#define QIB_7322_RcvTIDCnt_OFFS 0x130 +#define QIB_7322_RcvTIDCnt_DEF 0x0000000000000200 + +#define QIB_7322_RcvEgrBase_OFFS 0x138 +#define QIB_7322_RcvEgrBase_DEF 0x0000000000014000 + +#define QIB_7322_RcvEgrCnt_OFFS 0x140 +#define QIB_7322_RcvEgrCnt_DEF 0x0000000000001000 + +#define QIB_7322_RcvBufBase_OFFS 0x148 +#define QIB_7322_RcvBufBase_DEF 0x0000000000080000 + +#define QIB_7322_RcvBufSize_OFFS 0x150 +#define QIB_7322_RcvBufSize_DEF 0x0000000000005000 + +#define QIB_7322_RxIntMemBase_OFFS 0x158 +#define QIB_7322_RxIntMemBase_DEF 0x0000000000077000 + +#define QIB_7322_RxIntMemSize_OFFS 0x160 +#define QIB_7322_RxIntMemSize_DEF 0x0000000000007000 + +#define QIB_7322_feature_mask_OFFS 0x190 +#define QIB_7322_feature_mask_DEF 0x00000000000000XX + +#define QIB_7322_active_feature_mask_OFFS 0x198 +#define QIB_7322_active_feature_mask_DEF 0x00000000000000XX +#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_LSB 0x5 +#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_MSB 0x5 +#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_LSB 0x4 +#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_MSB 0x4 +#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_LSB 0x3 +#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_MSB 0x3 +#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_LSB 0x2 +#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_MSB 0x2 +#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_LSB 0x1 +#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_MSB 0x1 +#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_LSB 0x0 +#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_MSB 0x0 +#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_RMASK 0x1 + +#define QIB_7322_SendCtrl_OFFS 0x1C0 +#define QIB_7322_SendCtrl_DEF 0x0000000000000000 +#define QIB_7322_SendCtrl_Disarm_LSB 0x1F +#define QIB_7322_SendCtrl_Disarm_MSB 0x1F +#define QIB_7322_SendCtrl_Disarm_RMASK 0x1 +#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_LSB 0x1D +#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_MSB 0x1D +#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_RMASK 0x1 +#define QIB_7322_SendCtrl_AvailUpdThld_LSB 0x18 +#define QIB_7322_SendCtrl_AvailUpdThld_MSB 0x1C +#define QIB_7322_SendCtrl_AvailUpdThld_RMASK 0x1F +#define QIB_7322_SendCtrl_DisarmSendBuf_LSB 0x10 +#define QIB_7322_SendCtrl_DisarmSendBuf_MSB 0x17 +#define QIB_7322_SendCtrl_DisarmSendBuf_RMASK 0xFF +#define QIB_7322_SendCtrl_SpecialTriggerEn_LSB 0x4 +#define QIB_7322_SendCtrl_SpecialTriggerEn_MSB 0x4 +#define QIB_7322_SendCtrl_SpecialTriggerEn_RMASK 0x1 +#define QIB_7322_SendCtrl_SendBufAvailUpd_LSB 0x2 +#define QIB_7322_SendCtrl_SendBufAvailUpd_MSB 0x2 +#define QIB_7322_SendCtrl_SendBufAvailUpd_RMASK 0x1 +#define QIB_7322_SendCtrl_SendIntBufAvail_LSB 0x1 +#define QIB_7322_SendCtrl_SendIntBufAvail_MSB 0x1 +#define QIB_7322_SendCtrl_SendIntBufAvail_RMASK 0x1 + +#define QIB_7322_SendBufBase_OFFS 0x1C8 +#define QIB_7322_SendBufBase_DEF 0x0018000000100000 +#define QIB_7322_SendBufBase_BaseAddr_LargePIO_LSB 0x20 +#define QIB_7322_SendBufBase_BaseAddr_LargePIO_MSB 0x34 +#define QIB_7322_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF +#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_LSB 0x0 +#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_MSB 0x14 +#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF + +#define QIB_7322_SendBufSize_OFFS 0x1D0 +#define QIB_7322_SendBufSize_DEF 0x0000108000000880 +#define QIB_7322_SendBufSize_Size_LargePIO_LSB 0x20 +#define QIB_7322_SendBufSize_Size_LargePIO_MSB 0x2C +#define QIB_7322_SendBufSize_Size_LargePIO_RMASK 0x1FFF +#define QIB_7322_SendBufSize_Size_SmallPIO_LSB 0x0 +#define QIB_7322_SendBufSize_Size_SmallPIO_MSB 0xB +#define QIB_7322_SendBufSize_Size_SmallPIO_RMASK 0xFFF + +#define QIB_7322_SendBufCnt_OFFS 0x1D8 +#define QIB_7322_SendBufCnt_DEF 0x0000002000000080 +#define QIB_7322_SendBufCnt_Num_LargeBuffers_LSB 0x20 +#define QIB_7322_SendBufCnt_Num_LargeBuffers_MSB 0x25 +#define QIB_7322_SendBufCnt_Num_LargeBuffers_RMASK 0x3F +#define QIB_7322_SendBufCnt_Num_SmallBuffers_LSB 0x0 +#define QIB_7322_SendBufCnt_Num_SmallBuffers_MSB 0x8 +#define QIB_7322_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF + +#define QIB_7322_SendBufAvailAddr_OFFS 0x1E0 +#define QIB_7322_SendBufAvailAddr_DEF 0x0000000000000000 +#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6 +#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_MSB 0x27 +#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF + +#define QIB_7322_SendBufErr0_OFFS 0x240 +#define QIB_7322_SendBufErr0_DEF 0x0000000000000000 +#define QIB_7322_SendBufErr0_SendBufErr_63_0_LSB 0x0 +#define QIB_7322_SendBufErr0_SendBufErr_63_0_MSB 0x3F +#define QIB_7322_SendBufErr0_SendBufErr_63_0_RMASK 0x0 + +#define QIB_7322_AvailUpdCount_OFFS 0x268 +#define QIB_7322_AvailUpdCount_DEF 0x0000000000000000 +#define QIB_7322_AvailUpdCount_AvailUpdCount_LSB 0x0 +#define QIB_7322_AvailUpdCount_AvailUpdCount_MSB 0x4 +#define QIB_7322_AvailUpdCount_AvailUpdCount_RMASK 0x1F + +#define QIB_7322_RcvHdrAddr0_OFFS 0x280 +#define QIB_7322_RcvHdrAddr0_DEF 0x0000000000000000 +#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_LSB 0x2 +#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_MSB 0x27 +#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_RMASK 0x3FFFFFFFFF + +#define QIB_7322_RcvHdrTailAddr0_OFFS 0x340 +#define QIB_7322_RcvHdrTailAddr0_DEF 0x0000000000000000 +#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_LSB 0x2 +#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_MSB 0x27 +#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_RMASK 0x3FFFFFFFFF + +#define QIB_7322_ahb_access_ctrl_OFFS 0x460 +#define QIB_7322_ahb_access_ctrl_DEF 0x0000000000000000 +#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_LSB 0x1 +#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_MSB 0x2 +#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_RMASK 0x3 +#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_LSB 0x0 +#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_MSB 0x0 +#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_RMASK 0x1 + +#define QIB_7322_ahb_transaction_reg_OFFS 0x468 +#define QIB_7322_ahb_transaction_reg_DEF 0x0000000080000000 +#define QIB_7322_ahb_transaction_reg_ahb_data_LSB 0x20 +#define QIB_7322_ahb_transaction_reg_ahb_data_MSB 0x3F +#define QIB_7322_ahb_transaction_reg_ahb_data_RMASK 0xFFFFFFFF +#define QIB_7322_ahb_transaction_reg_ahb_rdy_LSB 0x1F +#define QIB_7322_ahb_transaction_reg_ahb_rdy_MSB 0x1F +#define QIB_7322_ahb_transaction_reg_ahb_rdy_RMASK 0x1 +#define QIB_7322_ahb_transaction_reg_ahb_req_err_LSB 0x1E +#define QIB_7322_ahb_transaction_reg_ahb_req_err_MSB 0x1E +#define QIB_7322_ahb_transaction_reg_ahb_req_err_RMASK 0x1 +#define QIB_7322_ahb_transaction_reg_write_not_read_LSB 0x1B +#define QIB_7322_ahb_transaction_reg_write_not_read_MSB 0x1B +#define QIB_7322_ahb_transaction_reg_write_not_read_RMASK 0x1 +#define QIB_7322_ahb_transaction_reg_ahb_address_LSB 0x10 +#define QIB_7322_ahb_transaction_reg_ahb_address_MSB 0x1A +#define QIB_7322_ahb_transaction_reg_ahb_address_RMASK 0x7FF + +#define QIB_7322_SPC_JTAG_ACCESS_REG_OFFS 0x470 +#define QIB_7322_SPC_JTAG_ACCESS_REG_DEF 0x0000000000000001 +#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_LSB 0xA +#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_MSB 0xA +#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_RMASK 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_LSB 0x5 +#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_MSB 0x9 +#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_RMASK 0x1F +#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_LSB 0x3 +#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_MSB 0x4 +#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_RMASK 0x3 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_LSB 0x2 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_MSB 0x2 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_RMASK 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_LSB 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_MSB 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_RMASK 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_LSB 0x0 +#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_MSB 0x0 +#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_RMASK 0x1 + +#define QIB_7322_SendCheckMask0_OFFS 0x4C0 +#define QIB_7322_SendCheckMask0_DEF 0x0000000000000000 +#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_LSB 0x0 +#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_MSB 0x3F +#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_RMASK 0x0 + +#define QIB_7322_SendGRHCheckMask0_OFFS 0x4E0 +#define QIB_7322_SendGRHCheckMask0_DEF 0x0000000000000000 +#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_LSB 0x0 +#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_MSB 0x3F +#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_RMASK 0x0 + +#define QIB_7322_SendIBPacketMask0_OFFS 0x500 +#define QIB_7322_SendIBPacketMask0_DEF 0x0000000000000000 +#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_LSB 0x0 +#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_MSB 0x3F +#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_RMASK 0x0 + +#define QIB_7322_IntRedirect0_OFFS 0x540 +#define QIB_7322_IntRedirect0_DEF 0x0000000000000000 +#define QIB_7322_IntRedirect0_vec11_LSB 0x37 +#define QIB_7322_IntRedirect0_vec11_MSB 0x3B +#define QIB_7322_IntRedirect0_vec11_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec10_LSB 0x32 +#define QIB_7322_IntRedirect0_vec10_MSB 0x36 +#define QIB_7322_IntRedirect0_vec10_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec9_LSB 0x2D +#define QIB_7322_IntRedirect0_vec9_MSB 0x31 +#define QIB_7322_IntRedirect0_vec9_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec8_LSB 0x28 +#define QIB_7322_IntRedirect0_vec8_MSB 0x2C +#define QIB_7322_IntRedirect0_vec8_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec7_LSB 0x23 +#define QIB_7322_IntRedirect0_vec7_MSB 0x27 +#define QIB_7322_IntRedirect0_vec7_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec6_LSB 0x1E +#define QIB_7322_IntRedirect0_vec6_MSB 0x22 +#define QIB_7322_IntRedirect0_vec6_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec5_LSB 0x19 +#define QIB_7322_IntRedirect0_vec5_MSB 0x1D +#define QIB_7322_IntRedirect0_vec5_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec4_LSB 0x14 +#define QIB_7322_IntRedirect0_vec4_MSB 0x18 +#define QIB_7322_IntRedirect0_vec4_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec3_LSB 0xF +#define QIB_7322_IntRedirect0_vec3_MSB 0x13 +#define QIB_7322_IntRedirect0_vec3_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec2_LSB 0xA +#define QIB_7322_IntRedirect0_vec2_MSB 0xE +#define QIB_7322_IntRedirect0_vec2_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec1_LSB 0x5 +#define QIB_7322_IntRedirect0_vec1_MSB 0x9 +#define QIB_7322_IntRedirect0_vec1_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec0_LSB 0x0 +#define QIB_7322_IntRedirect0_vec0_MSB 0x4 +#define QIB_7322_IntRedirect0_vec0_RMASK 0x1F + +#define QIB_7322_Int_Granted_OFFS 0x570 +#define QIB_7322_Int_Granted_DEF 0x0000000000000000 + +#define QIB_7322_vec_clr_without_int_OFFS 0x578 +#define QIB_7322_vec_clr_without_int_DEF 0x0000000000000000 + +#define QIB_7322_DCACtrlA_OFFS 0x580 +#define QIB_7322_DCACtrlA_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_LSB 0x4 +#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_MSB 0x4 +#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_LSB 0x3 +#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_MSB 0x3 +#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_LSB 0x2 +#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_MSB 0x2 +#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_EagerDCAEnable_LSB 0x1 +#define QIB_7322_DCACtrlA_EagerDCAEnable_MSB 0x1 +#define QIB_7322_DCACtrlA_EagerDCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_LSB 0x0 +#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_MSB 0x0 +#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_RMASK 0x1 + +#define QIB_7322_DCACtrlB_OFFS 0x588 +#define QIB_7322_DCACtrlB_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlC_OFFS 0x590 +#define QIB_7322_DCACtrlC_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlD_OFFS 0x598 +#define QIB_7322_DCACtrlD_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlE_OFFS 0x5A0 +#define QIB_7322_DCACtrlE_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlF_OFFS 0x5A8 +#define QIB_7322_DCACtrlF_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlF_SendDma1DCAOPH_LSB 0x28 +#define QIB_7322_DCACtrlF_SendDma1DCAOPH_MSB 0x2F +#define QIB_7322_DCACtrlF_SendDma1DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlF_SendDma0DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlF_SendDma0DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlF_SendDma0DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_RMASK 0xFF + +#define QIB_7322_RcvAvailTimeOut0_OFFS 0xC00 +#define QIB_7322_RcvAvailTimeOut0_DEF 0x0000000000000000 +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_LSB 0x10 +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_MSB 0x1F +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_RMASK 0xFFFF +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_LSB 0x0 +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_MSB 0xF +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_RMASK 0xFFFF + +#define QIB_7322_CntrRegBase_0_OFFS 0x1028 +#define QIB_7322_CntrRegBase_0_DEF 0x0000000000012000 + +#define QIB_7322_ErrMask_0_OFFS 0x1080 +#define QIB_7322_ErrMask_0_DEF 0x0000000000000000 +#define QIB_7322_ErrMask_0_IBStatusChangedMask_LSB 0x3A +#define QIB_7322_ErrMask_0_IBStatusChangedMask_MSB 0x3A +#define QIB_7322_ErrMask_0_IBStatusChangedMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SHeadersErrMask_LSB 0x39 +#define QIB_7322_ErrMask_0_SHeadersErrMask_MSB 0x39 +#define QIB_7322_ErrMask_0_SHeadersErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_LSB 0x36 +#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_MSB 0x36 +#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaHaltErrMask_LSB 0x31 +#define QIB_7322_ErrMask_0_SDmaHaltErrMask_MSB 0x31 +#define QIB_7322_ErrMask_0_SDmaHaltErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_LSB 0x30 +#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_MSB 0x30 +#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_LSB 0x2F +#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_MSB 0x2F +#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_LSB 0x2E +#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_MSB 0x2E +#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_LSB 0x2D +#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_MSB 0x2D +#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_LSB 0x2C +#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_MSB 0x2C +#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDma1stDescErrMask_LSB 0x2B +#define QIB_7322_ErrMask_0_SDma1stDescErrMask_MSB 0x2B +#define QIB_7322_ErrMask_0_SDma1stDescErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaBaseErrMask_LSB 0x2A +#define QIB_7322_ErrMask_0_SDmaBaseErrMask_MSB 0x2A +#define QIB_7322_ErrMask_0_SDmaBaseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_LSB 0x29 +#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_MSB 0x29 +#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_LSB 0x28 +#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_MSB 0x28 +#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_LSB 0x27 +#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_MSB 0x27 +#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_LSB 0x26 +#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_MSB 0x26 +#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_LSB 0x25 +#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_MSB 0x25 +#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_LSB 0x24 +#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_MSB 0x24 +#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_LSB 0x22 +#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_MSB 0x22 +#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_LSB 0x21 +#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_MSB 0x21 +#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendPktLenErrMask_LSB 0x20 +#define QIB_7322_ErrMask_0_SendPktLenErrMask_MSB 0x20 +#define QIB_7322_ErrMask_0_SendPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendUnderRunErrMask_LSB 0x1F +#define QIB_7322_ErrMask_0_SendUnderRunErrMask_MSB 0x1F +#define QIB_7322_ErrMask_0_SendUnderRunErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_LSB 0x1E +#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_MSB 0x1E +#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_LSB 0x1D +#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_MSB 0x1D +#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_LSB 0x11 +#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_MSB 0x11 +#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvHdrErrMask_LSB 0x10 +#define QIB_7322_ErrMask_0_RcvHdrErrMask_MSB 0x10 +#define QIB_7322_ErrMask_0_RcvHdrErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_LSB 0xF +#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_MSB 0xF +#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvBadTidErrMask_LSB 0xE +#define QIB_7322_ErrMask_0_RcvBadTidErrMask_MSB 0xE +#define QIB_7322_ErrMask_0_RcvBadTidErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_LSB 0xB +#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_MSB 0xB +#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_LSB 0xA +#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_MSB 0xA +#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvEBPErrMask_LSB 0x9 +#define QIB_7322_ErrMask_0_RcvEBPErrMask_MSB 0x9 +#define QIB_7322_ErrMask_0_RcvEBPErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_LSB 0x8 +#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_MSB 0x8 +#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_LSB 0x7 +#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_MSB 0x7 +#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_LSB 0x6 +#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_MSB 0x6 +#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_LSB 0x5 +#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_MSB 0x5 +#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_LSB 0x4 +#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_MSB 0x4 +#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_LSB 0x3 +#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_MSB 0x3 +#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvICRCErrMask_LSB 0x2 +#define QIB_7322_ErrMask_0_RcvICRCErrMask_MSB 0x2 +#define QIB_7322_ErrMask_0_RcvICRCErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvVCRCErrMask_LSB 0x1 +#define QIB_7322_ErrMask_0_RcvVCRCErrMask_MSB 0x1 +#define QIB_7322_ErrMask_0_RcvVCRCErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvFormatErrMask_LSB 0x0 +#define QIB_7322_ErrMask_0_RcvFormatErrMask_MSB 0x0 +#define QIB_7322_ErrMask_0_RcvFormatErrMask_RMASK 0x1 + +#define QIB_7322_ErrStatus_0_OFFS 0x1088 +#define QIB_7322_ErrStatus_0_DEF 0x0000000000000000 +#define QIB_7322_ErrStatus_0_IBStatusChanged_LSB 0x3A +#define QIB_7322_ErrStatus_0_IBStatusChanged_MSB 0x3A +#define QIB_7322_ErrStatus_0_IBStatusChanged_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SHeadersErr_LSB 0x39 +#define QIB_7322_ErrStatus_0_SHeadersErr_MSB 0x39 +#define QIB_7322_ErrStatus_0_SHeadersErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_LSB 0x36 +#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_MSB 0x36 +#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaHaltErr_LSB 0x31 +#define QIB_7322_ErrStatus_0_SDmaHaltErr_MSB 0x31 +#define QIB_7322_ErrStatus_0_SDmaHaltErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_LSB 0x30 +#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_MSB 0x30 +#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_LSB 0x2F +#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_MSB 0x2F +#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_LSB 0x2E +#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_MSB 0x2E +#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaDwEnErr_LSB 0x2D +#define QIB_7322_ErrStatus_0_SDmaDwEnErr_MSB 0x2D +#define QIB_7322_ErrStatus_0_SDmaDwEnErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_LSB 0x2C +#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_MSB 0x2C +#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDma1stDescErr_LSB 0x2B +#define QIB_7322_ErrStatus_0_SDma1stDescErr_MSB 0x2B +#define QIB_7322_ErrStatus_0_SDma1stDescErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaBaseErr_LSB 0x2A +#define QIB_7322_ErrStatus_0_SDmaBaseErr_MSB 0x2A +#define QIB_7322_ErrStatus_0_SDmaBaseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_LSB 0x29 +#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_MSB 0x29 +#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_LSB 0x28 +#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_MSB 0x28 +#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_LSB 0x27 +#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_MSB 0x27 +#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendBufMisuseErr_LSB 0x26 +#define QIB_7322_ErrStatus_0_SendBufMisuseErr_MSB 0x26 +#define QIB_7322_ErrStatus_0_SendBufMisuseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_LSB 0x25 +#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_MSB 0x25 +#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_LSB 0x24 +#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_MSB 0x24 +#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_LSB 0x22 +#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_MSB 0x22 +#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_LSB 0x21 +#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_MSB 0x21 +#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendPktLenErr_LSB 0x20 +#define QIB_7322_ErrStatus_0_SendPktLenErr_MSB 0x20 +#define QIB_7322_ErrStatus_0_SendPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendUnderRunErr_LSB 0x1F +#define QIB_7322_ErrStatus_0_SendUnderRunErr_MSB 0x1F +#define QIB_7322_ErrStatus_0_SendUnderRunErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_LSB 0x1E +#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_MSB 0x1E +#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendMinPktLenErr_LSB 0x1D +#define QIB_7322_ErrStatus_0_SendMinPktLenErr_MSB 0x1D +#define QIB_7322_ErrStatus_0_SendMinPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_LSB 0x11 +#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_MSB 0x11 +#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvHdrErr_LSB 0x10 +#define QIB_7322_ErrStatus_0_RcvHdrErr_MSB 0x10 +#define QIB_7322_ErrStatus_0_RcvHdrErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvHdrLenErr_LSB 0xF +#define QIB_7322_ErrStatus_0_RcvHdrLenErr_MSB 0xF +#define QIB_7322_ErrStatus_0_RcvHdrLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvBadTidErr_LSB 0xE +#define QIB_7322_ErrStatus_0_RcvBadTidErr_MSB 0xE +#define QIB_7322_ErrStatus_0_RcvBadTidErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvBadVersionErr_LSB 0xB +#define QIB_7322_ErrStatus_0_RcvBadVersionErr_MSB 0xB +#define QIB_7322_ErrStatus_0_RcvBadVersionErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvIBFlowErr_LSB 0xA +#define QIB_7322_ErrStatus_0_RcvIBFlowErr_MSB 0xA +#define QIB_7322_ErrStatus_0_RcvIBFlowErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvEBPErr_LSB 0x9 +#define QIB_7322_ErrStatus_0_RcvEBPErr_MSB 0x9 +#define QIB_7322_ErrStatus_0_RcvEBPErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_LSB 0x8 +#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_MSB 0x8 +#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_LSB 0x7 +#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_MSB 0x7 +#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_LSB 0x6 +#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_MSB 0x6 +#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_LSB 0x5 +#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_MSB 0x5 +#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_LSB 0x4 +#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_MSB 0x4 +#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_LSB 0x3 +#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_MSB 0x3 +#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvICRCErr_LSB 0x2 +#define QIB_7322_ErrStatus_0_RcvICRCErr_MSB 0x2 +#define QIB_7322_ErrStatus_0_RcvICRCErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvVCRCErr_LSB 0x1 +#define QIB_7322_ErrStatus_0_RcvVCRCErr_MSB 0x1 +#define QIB_7322_ErrStatus_0_RcvVCRCErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvFormatErr_LSB 0x0 +#define QIB_7322_ErrStatus_0_RcvFormatErr_MSB 0x0 +#define QIB_7322_ErrStatus_0_RcvFormatErr_RMASK 0x1 + +#define QIB_7322_ErrClear_0_OFFS 0x1090 +#define QIB_7322_ErrClear_0_DEF 0x0000000000000000 +#define QIB_7322_ErrClear_0_IBStatusChangedClear_LSB 0x3A +#define QIB_7322_ErrClear_0_IBStatusChangedClear_MSB 0x3A +#define QIB_7322_ErrClear_0_IBStatusChangedClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SHeadersErrClear_LSB 0x39 +#define QIB_7322_ErrClear_0_SHeadersErrClear_MSB 0x39 +#define QIB_7322_ErrClear_0_SHeadersErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_LSB 0x36 +#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_MSB 0x36 +#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaHaltErrClear_LSB 0x31 +#define QIB_7322_ErrClear_0_SDmaHaltErrClear_MSB 0x31 +#define QIB_7322_ErrClear_0_SDmaHaltErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_LSB 0x30 +#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_MSB 0x30 +#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_LSB 0x2F +#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_MSB 0x2F +#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_LSB 0x2E +#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_MSB 0x2E +#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_LSB 0x2D +#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_MSB 0x2D +#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_LSB 0x2C +#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_MSB 0x2C +#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDma1stDescErrClear_LSB 0x2B +#define QIB_7322_ErrClear_0_SDma1stDescErrClear_MSB 0x2B +#define QIB_7322_ErrClear_0_SDma1stDescErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaBaseErrClear_LSB 0x2A +#define QIB_7322_ErrClear_0_SDmaBaseErrClear_MSB 0x2A +#define QIB_7322_ErrClear_0_SDmaBaseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_LSB 0x29 +#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_MSB 0x29 +#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_LSB 0x28 +#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_MSB 0x28 +#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_LSB 0x27 +#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_MSB 0x27 +#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_LSB 0x26 +#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_MSB 0x26 +#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_LSB 0x25 +#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_MSB 0x25 +#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_LSB 0x24 +#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_MSB 0x24 +#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_LSB 0x22 +#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_MSB 0x22 +#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_LSB 0x21 +#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_MSB 0x21 +#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendPktLenErrClear_LSB 0x20 +#define QIB_7322_ErrClear_0_SendPktLenErrClear_MSB 0x20 +#define QIB_7322_ErrClear_0_SendPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendUnderRunErrClear_LSB 0x1F +#define QIB_7322_ErrClear_0_SendUnderRunErrClear_MSB 0x1F +#define QIB_7322_ErrClear_0_SendUnderRunErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_LSB 0x1E +#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_MSB 0x1E +#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_LSB 0x1D +#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_MSB 0x1D +#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_LSB 0x11 +#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_MSB 0x11 +#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvHdrErrClear_LSB 0x10 +#define QIB_7322_ErrClear_0_RcvHdrErrClear_MSB 0x10 +#define QIB_7322_ErrClear_0_RcvHdrErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_LSB 0xF +#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_MSB 0xF +#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvBadTidErrClear_LSB 0xE +#define QIB_7322_ErrClear_0_RcvBadTidErrClear_MSB 0xE +#define QIB_7322_ErrClear_0_RcvBadTidErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_LSB 0xB +#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_MSB 0xB +#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_LSB 0xA +#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_MSB 0xA +#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvEBPErrClear_LSB 0x9 +#define QIB_7322_ErrClear_0_RcvEBPErrClear_MSB 0x9 +#define QIB_7322_ErrClear_0_RcvEBPErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_LSB 0x8 +#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_MSB 0x8 +#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_LSB 0x7 +#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_MSB 0x7 +#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_LSB 0x6 +#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_MSB 0x6 +#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_LSB 0x5 +#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_MSB 0x5 +#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_LSB 0x4 +#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_MSB 0x4 +#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_LSB 0x3 +#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_MSB 0x3 +#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvICRCErrClear_LSB 0x2 +#define QIB_7322_ErrClear_0_RcvICRCErrClear_MSB 0x2 +#define QIB_7322_ErrClear_0_RcvICRCErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvVCRCErrClear_LSB 0x1 +#define QIB_7322_ErrClear_0_RcvVCRCErrClear_MSB 0x1 +#define QIB_7322_ErrClear_0_RcvVCRCErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvFormatErrClear_LSB 0x0 +#define QIB_7322_ErrClear_0_RcvFormatErrClear_MSB 0x0 +#define QIB_7322_ErrClear_0_RcvFormatErrClear_RMASK 0x1 + +#define QIB_7322_TXEStatus_0_OFFS 0x10B8 +#define QIB_7322_TXEStatus_0_DEF 0x0000000XC00080FF +#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_LSB 0x1F +#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_MSB 0x1F +#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_RMASK 0x1 +#define QIB_7322_TXEStatus_0_RmFifoEmpty_LSB 0x1E +#define QIB_7322_TXEStatus_0_RmFifoEmpty_MSB 0x1E +#define QIB_7322_TXEStatus_0_RmFifoEmpty_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_LSB 0xF +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_MSB 0xF +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_LSB 0x7 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_MSB 0x7 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_LSB 0x6 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_MSB 0x6 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_LSB 0x5 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_MSB 0x5 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_LSB 0x4 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_MSB 0x4 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_LSB 0x3 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_MSB 0x3 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_LSB 0x2 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_MSB 0x2 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_LSB 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_MSB 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_LSB 0x0 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_MSB 0x0 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_RMASK 0x1 + +#define QIB_7322_RcvCtrl_0_OFFS 0x1100 +#define QIB_7322_RcvCtrl_0_DEF 0x0000000000000000 +#define QIB_7322_RcvCtrl_0_RcvResetCredit_LSB 0x2A +#define QIB_7322_RcvCtrl_0_RcvResetCredit_MSB 0x2A +#define QIB_7322_RcvCtrl_0_RcvResetCredit_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_LSB 0x29 +#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_MSB 0x29 +#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_LSB 0x28 +#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_MSB 0x28 +#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_LSB 0x27 +#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_MSB 0x27 +#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_ContextEnableUser_LSB 0x2 +#define QIB_7322_RcvCtrl_0_ContextEnableUser_MSB 0x11 +#define QIB_7322_RcvCtrl_0_ContextEnableUser_RMASK 0xFFFF +#define QIB_7322_RcvCtrl_0_ContextEnableKernel_LSB 0x0 +#define QIB_7322_RcvCtrl_0_ContextEnableKernel_MSB 0x0 +#define QIB_7322_RcvCtrl_0_ContextEnableKernel_RMASK 0x1 + +#define QIB_7322_RcvBTHQP_0_OFFS 0x1108 +#define QIB_7322_RcvBTHQP_0_DEF 0x0000000000000000 +#define QIB_7322_RcvBTHQP_0_RcvBTHQP_LSB 0x0 +#define QIB_7322_RcvBTHQP_0_RcvBTHQP_MSB 0x17 +#define QIB_7322_RcvBTHQP_0_RcvBTHQP_RMASK 0xFFFFFF + +#define QIB_7322_RcvQPMapTableA_0_OFFS 0x1110 +#define QIB_7322_RcvQPMapTableA_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_LSB 0x19 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_MSB 0x1D +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_LSB 0x14 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_MSB 0x18 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_LSB 0xF +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_MSB 0x13 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_LSB 0xA +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_MSB 0xE +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_LSB 0x5 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_MSB 0x9 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_LSB 0x0 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_MSB 0x4 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableB_0_OFFS 0x1118 +#define QIB_7322_RcvQPMapTableB_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_LSB 0x19 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_MSB 0x1D +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_LSB 0x14 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_MSB 0x18 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_LSB 0xF +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_MSB 0x13 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_LSB 0xA +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_MSB 0xE +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_LSB 0x5 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_MSB 0x9 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_LSB 0x0 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_MSB 0x4 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableC_0_OFFS 0x1120 +#define QIB_7322_RcvQPMapTableC_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_LSB 0x19 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_MSB 0x1D +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_LSB 0x14 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_MSB 0x18 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_LSB 0xF +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_MSB 0x13 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_LSB 0xA +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_MSB 0xE +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_LSB 0x5 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_MSB 0x9 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_LSB 0x0 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_MSB 0x4 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableD_0_OFFS 0x1128 +#define QIB_7322_RcvQPMapTableD_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_LSB 0x19 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_MSB 0x1D +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_LSB 0x14 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_MSB 0x18 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_LSB 0xF +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_MSB 0x13 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_LSB 0xA +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_MSB 0xE +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_LSB 0x5 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_MSB 0x9 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_LSB 0x0 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_MSB 0x4 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableE_0_OFFS 0x1130 +#define QIB_7322_RcvQPMapTableE_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_LSB 0x19 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_MSB 0x1D +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_LSB 0x14 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_MSB 0x18 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_LSB 0xF +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_MSB 0x13 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_LSB 0xA +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_MSB 0xE +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_LSB 0x5 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_MSB 0x9 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_LSB 0x0 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_MSB 0x4 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableF_0_OFFS 0x1138 +#define QIB_7322_RcvQPMapTableF_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_LSB 0x5 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_MSB 0x9 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_RMASK 0x1F +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_LSB 0x0 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_MSB 0x4 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_RMASK 0x1F + +#define QIB_7322_PSStat_0_OFFS 0x1140 +#define QIB_7322_PSStat_0_DEF 0x0000000000000000 + +#define QIB_7322_PSStart_0_OFFS 0x1148 +#define QIB_7322_PSStart_0_DEF 0x0000000000000000 + +#define QIB_7322_PSInterval_0_OFFS 0x1150 +#define QIB_7322_PSInterval_0_DEF 0x0000000000000000 + +#define QIB_7322_RcvStatus_0_OFFS 0x1160 +#define QIB_7322_RcvStatus_0_DEF 0x0000000000000000 +#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_LSB 0x1 +#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_MSB 0x5 +#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_RMASK 0x1F +#define QIB_7322_RcvStatus_0_RxPktInProgress_LSB 0x0 +#define QIB_7322_RcvStatus_0_RxPktInProgress_MSB 0x0 +#define QIB_7322_RcvStatus_0_RxPktInProgress_RMASK 0x1 + +#define QIB_7322_RcvPartitionKey_0_OFFS 0x1168 +#define QIB_7322_RcvPartitionKey_0_DEF 0x0000000000000000 + +#define QIB_7322_RcvQPMulticastContext_0_OFFS 0x1170 +#define QIB_7322_RcvQPMulticastContext_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_LSB 0x0 +#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_MSB 0x4 +#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_RMASK 0x1F + +#define QIB_7322_RcvPktLEDCnt_0_OFFS 0x1178 +#define QIB_7322_RcvPktLEDCnt_0_DEF 0x0000000000000000 +#define QIB_7322_RcvPktLEDCnt_0_ONperiod_LSB 0x20 +#define QIB_7322_RcvPktLEDCnt_0_ONperiod_MSB 0x3F +#define QIB_7322_RcvPktLEDCnt_0_ONperiod_RMASK 0xFFFFFFFF +#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_LSB 0x0 +#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_MSB 0x1F +#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_RMASK 0xFFFFFFFF + +#define QIB_7322_SendDmaIdleCnt_0_OFFS 0x1180 +#define QIB_7322_SendDmaIdleCnt_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_LSB 0x0 +#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_MSB 0xF +#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_RMASK 0xFFFF + +#define QIB_7322_SendDmaReloadCnt_0_OFFS 0x1188 +#define QIB_7322_SendDmaReloadCnt_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_LSB 0x0 +#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_MSB 0xF +#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_RMASK 0xFFFF + +#define QIB_7322_SendDmaDescCnt_0_OFFS 0x1190 +#define QIB_7322_SendDmaDescCnt_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_LSB 0x0 +#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_MSB 0xF +#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_RMASK 0xFFFF + +#define QIB_7322_SendCtrl_0_OFFS 0x11C0 +#define QIB_7322_SendCtrl_0_DEF 0x0000000000000000 +#define QIB_7322_SendCtrl_0_IBVLArbiterEn_LSB 0xF +#define QIB_7322_SendCtrl_0_IBVLArbiterEn_MSB 0xF +#define QIB_7322_SendCtrl_0_IBVLArbiterEn_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_LSB 0xE +#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_MSB 0xE +#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_LSB 0xD +#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_MSB 0xD +#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaHalt_LSB 0xC +#define QIB_7322_SendCtrl_0_SDmaHalt_MSB 0xC +#define QIB_7322_SendCtrl_0_SDmaHalt_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaEnable_LSB 0xB +#define QIB_7322_SendCtrl_0_SDmaEnable_MSB 0xB +#define QIB_7322_SendCtrl_0_SDmaEnable_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_LSB 0xA +#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_MSB 0xA +#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaIntEnable_LSB 0x9 +#define QIB_7322_SendCtrl_0_SDmaIntEnable_MSB 0x9 +#define QIB_7322_SendCtrl_0_SDmaIntEnable_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaCleanup_LSB 0x8 +#define QIB_7322_SendCtrl_0_SDmaCleanup_MSB 0x8 +#define QIB_7322_SendCtrl_0_SDmaCleanup_RMASK 0x1 +#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_LSB 0x7 +#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_MSB 0x7 +#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SendEnable_LSB 0x3 +#define QIB_7322_SendCtrl_0_SendEnable_MSB 0x3 +#define QIB_7322_SendCtrl_0_SendEnable_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeBypassIbc_LSB 0x1 +#define QIB_7322_SendCtrl_0_TxeBypassIbc_MSB 0x1 +#define QIB_7322_SendCtrl_0_TxeBypassIbc_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeAbortIbc_LSB 0x0 +#define QIB_7322_SendCtrl_0_TxeAbortIbc_MSB 0x0 +#define QIB_7322_SendCtrl_0_TxeAbortIbc_RMASK 0x1 + +#define QIB_7322_SendDmaBase_0_OFFS 0x11F8 +#define QIB_7322_SendDmaBase_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaBase_0_SendDmaBase_LSB 0x0 +#define QIB_7322_SendDmaBase_0_SendDmaBase_MSB 0x2F +#define QIB_7322_SendDmaBase_0_SendDmaBase_RMASK 0xFFFFFFFFFFFF + +#define QIB_7322_SendDmaLenGen_0_OFFS 0x1200 +#define QIB_7322_SendDmaLenGen_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaLenGen_0_Generation_LSB 0x10 +#define QIB_7322_SendDmaLenGen_0_Generation_MSB 0x12 +#define QIB_7322_SendDmaLenGen_0_Generation_RMASK 0x7 +#define QIB_7322_SendDmaLenGen_0_Length_LSB 0x0 +#define QIB_7322_SendDmaLenGen_0_Length_MSB 0xF +#define QIB_7322_SendDmaLenGen_0_Length_RMASK 0xFFFF + +#define QIB_7322_SendDmaTail_0_OFFS 0x1208 +#define QIB_7322_SendDmaTail_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaTail_0_SendDmaTail_LSB 0x0 +#define QIB_7322_SendDmaTail_0_SendDmaTail_MSB 0xF +#define QIB_7322_SendDmaTail_0_SendDmaTail_RMASK 0xFFFF + +#define QIB_7322_SendDmaHead_0_OFFS 0x1210 +#define QIB_7322_SendDmaHead_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_LSB 0x20 +#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_MSB 0x2F +#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_RMASK 0xFFFF +#define QIB_7322_SendDmaHead_0_SendDmaHead_LSB 0x0 +#define QIB_7322_SendDmaHead_0_SendDmaHead_MSB 0xF +#define QIB_7322_SendDmaHead_0_SendDmaHead_RMASK 0xFFFF + +#define QIB_7322_SendDmaHeadAddr_0_OFFS 0x1218 +#define QIB_7322_SendDmaHeadAddr_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_LSB 0x0 +#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_MSB 0x2F +#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_RMASK 0xFFFFFFFFFFFF + +#define QIB_7322_SendDmaBufMask0_0_OFFS 0x1220 +#define QIB_7322_SendDmaBufMask0_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_LSB 0x0 +#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_MSB 0x3F +#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_RMASK 0x0 + +#define QIB_7322_SendDmaStatus_0_OFFS 0x1238 +#define QIB_7322_SendDmaStatus_0_DEF 0x0000000042000000 +#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_LSB 0x3F +#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_MSB 0x3F +#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_HaltInProg_LSB 0x3E +#define QIB_7322_SendDmaStatus_0_HaltInProg_MSB 0x3E +#define QIB_7322_SendDmaStatus_0_HaltInProg_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_LSB 0x3D +#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_MSB 0x3D +#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_LSB 0x2F +#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_MSB 0x3C +#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_RMASK 0x3FFF +#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_LSB 0x28 +#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_MSB 0x2E +#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_RMASK 0x7F +#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_LSB 0x20 +#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_MSB 0x27 +#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_RMASK 0xFF +#define QIB_7322_SendDmaStatus_0_ScbFull_LSB 0x1F +#define QIB_7322_SendDmaStatus_0_ScbFull_MSB 0x1F +#define QIB_7322_SendDmaStatus_0_ScbFull_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbEmpty_LSB 0x1E +#define QIB_7322_SendDmaStatus_0_ScbEmpty_MSB 0x1E +#define QIB_7322_SendDmaStatus_0_ScbEmpty_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbEntryValid_LSB 0x1D +#define QIB_7322_SendDmaStatus_0_ScbEntryValid_MSB 0x1D +#define QIB_7322_SendDmaStatus_0_ScbEntryValid_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_LSB 0x1C +#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_MSB 0x1C +#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_LSB 0x1B +#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_MSB 0x1B +#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_LSB 0x1A +#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_MSB 0x1A +#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_LSB 0x19 +#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_MSB 0x19 +#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoFull_LSB 0x18 +#define QIB_7322_SendDmaStatus_0_SplFifoFull_MSB 0x18 +#define QIB_7322_SendDmaStatus_0_SplFifoFull_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_LSB 0x10 +#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_MSB 0x17 +#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_RMASK 0xFF +#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_LSB 0x0 +#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_MSB 0xF +#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_RMASK 0xFFFF + +#define QIB_7322_SendDmaPriorityThld_0_OFFS 0x1258 +#define QIB_7322_SendDmaPriorityThld_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_LSB 0x0 +#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_MSB 0x3 +#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_RMASK 0xF + +#define QIB_7322_SendHdrErrSymptom_0_OFFS 0x1260 +#define QIB_7322_SendHdrErrSymptom_0_DEF 0x0000000000000000 +#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_LSB 0x6 +#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_MSB 0x6 +#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_GRHFail_LSB 0x5 +#define QIB_7322_SendHdrErrSymptom_0_GRHFail_MSB 0x5 +#define QIB_7322_SendHdrErrSymptom_0_GRHFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_LSB 0x4 +#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_MSB 0x4 +#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_QPFail_LSB 0x3 +#define QIB_7322_SendHdrErrSymptom_0_QPFail_MSB 0x3 +#define QIB_7322_SendHdrErrSymptom_0_QPFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_LSB 0x2 +#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_MSB 0x2 +#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_LSB 0x1 +#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_MSB 0x1 +#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_LSB 0x0 +#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_MSB 0x0 +#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_RMASK 0x1 + +#define QIB_7322_RxCreditVL0_0_OFFS 0x1280 +#define QIB_7322_RxCreditVL0_0_DEF 0x0000000000000000 +#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_LSB 0x10 +#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_MSB 0x1B +#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_RMASK 0xFFF +#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_LSB 0x0 +#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_MSB 0xB +#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_RMASK 0xFFF + +#define QIB_7322_SendDmaBufUsed0_0_OFFS 0x1480 +#define QIB_7322_SendDmaBufUsed0_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_LSB 0x0 +#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_MSB 0x3F +#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_RMASK 0x0 + +#define QIB_7322_SendCheckControl_0_OFFS 0x14A8 +#define QIB_7322_SendCheckControl_0_DEF 0x0000000000000000 +#define QIB_7322_SendCheckControl_0_PKey_En_LSB 0x4 +#define QIB_7322_SendCheckControl_0_PKey_En_MSB 0x4 +#define QIB_7322_SendCheckControl_0_PKey_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_BTHQP_En_LSB 0x3 +#define QIB_7322_SendCheckControl_0_BTHQP_En_MSB 0x3 +#define QIB_7322_SendCheckControl_0_BTHQP_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_SLID_En_LSB 0x2 +#define QIB_7322_SendCheckControl_0_SLID_En_MSB 0x2 +#define QIB_7322_SendCheckControl_0_SLID_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_RawIPV6_En_LSB 0x1 +#define QIB_7322_SendCheckControl_0_RawIPV6_En_MSB 0x1 +#define QIB_7322_SendCheckControl_0_RawIPV6_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_LSB 0x0 +#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_MSB 0x0 +#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_RMASK 0x1 + +#define QIB_7322_SendIBSLIDMask_0_OFFS 0x14B0 +#define QIB_7322_SendIBSLIDMask_0_DEF 0x0000000000000000 +#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_LSB 0x0 +#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_MSB 0xF +#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_RMASK 0xFFFF + +#define QIB_7322_SendIBSLIDAssign_0_OFFS 0x14B8 +#define QIB_7322_SendIBSLIDAssign_0_DEF 0x0000000000000000 +#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_LSB 0x0 +#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_MSB 0xF +#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_RMASK 0xFFFF + +#define QIB_7322_IBCStatusA_0_OFFS 0x1540 +#define QIB_7322_IBCStatusA_0_DEF 0x0000000000000X02 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_LSB 0x27 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_MSB 0x27 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_LSB 0x26 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_MSB 0x26 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_LSB 0x25 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_MSB 0x25 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_LSB 0x24 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_MSB 0x24 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_LSB 0x23 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_MSB 0x23 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_LSB 0x22 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_MSB 0x22 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_LSB 0x21 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_MSB 0x21 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_LSB 0x20 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_MSB 0x20 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxReady_LSB 0x1E +#define QIB_7322_IBCStatusA_0_TxReady_MSB 0x1E +#define QIB_7322_IBCStatusA_0_TxReady_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_LSB 0x1D +#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_MSB 0x1D +#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_LSB 0xF +#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_MSB 0xF +#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_ScrambleEn_LSB 0xE +#define QIB_7322_IBCStatusA_0_ScrambleEn_MSB 0xE +#define QIB_7322_IBCStatusA_0_ScrambleEn_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_LSB 0xD +#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_MSB 0xD +#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_LSB 0xC +#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_MSB 0xC +#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_LSB 0xA +#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_MSB 0xA +#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkWidthActive_LSB 0x9 +#define QIB_7322_IBCStatusA_0_LinkWidthActive_MSB 0x9 +#define QIB_7322_IBCStatusA_0_LinkWidthActive_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkSpeedActive_LSB 0x8 +#define QIB_7322_IBCStatusA_0_LinkSpeedActive_MSB 0x8 +#define QIB_7322_IBCStatusA_0_LinkSpeedActive_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkState_LSB 0x5 +#define QIB_7322_IBCStatusA_0_LinkState_MSB 0x7 +#define QIB_7322_IBCStatusA_0_LinkState_RMASK 0x7 +#define QIB_7322_IBCStatusA_0_LinkTrainingState_LSB 0x0 +#define QIB_7322_IBCStatusA_0_LinkTrainingState_MSB 0x4 +#define QIB_7322_IBCStatusA_0_LinkTrainingState_RMASK 0x1F + +#define QIB_7322_IBCStatusB_0_OFFS 0x1548 +#define QIB_7322_IBCStatusB_0_DEF 0x00000000XXXXXXXX +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_LSB 0x27 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_MSB 0x27 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_LSB 0x26 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_MSB 0x26 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_LSB 0x25 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_MSB 0x25 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_LSB 0x24 +#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_MSB 0x24 +#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_LSB 0x20 +#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_MSB 0x23 +#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_RMASK 0xF +#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_LSB 0x1E +#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_MSB 0x1F +#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_RMASK 0x3 +#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_LSB 0x1A +#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_MSB 0x1D +#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_RMASK 0xF +#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_LSB 0x0 +#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_MSB 0x19 +#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_RMASK 0x3FFFFFF + +#define QIB_7322_IBCCtrlA_0_OFFS 0x1560 +#define QIB_7322_IBCCtrlA_0_DEF 0x0000000000000000 +#define QIB_7322_IBCCtrlA_0_Loopback_LSB 0x3F +#define QIB_7322_IBCCtrlA_0_Loopback_MSB 0x3F +#define QIB_7322_IBCCtrlA_0_Loopback_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_LSB 0x3E +#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_MSB 0x3E +#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_IBLinkEn_LSB 0x3D +#define QIB_7322_IBCCtrlA_0_IBLinkEn_MSB 0x3D +#define QIB_7322_IBCCtrlA_0_IBLinkEn_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_LSB 0x3C +#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_MSB 0x3C +#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_NumVLane_LSB 0x30 +#define QIB_7322_IBCCtrlA_0_NumVLane_MSB 0x32 +#define QIB_7322_IBCCtrlA_0_NumVLane_RMASK 0x7 +#define QIB_7322_IBCCtrlA_0_OverrunThreshold_LSB 0x24 +#define QIB_7322_IBCCtrlA_0_OverrunThreshold_MSB 0x27 +#define QIB_7322_IBCCtrlA_0_OverrunThreshold_RMASK 0xF +#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_LSB 0x20 +#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_MSB 0x23 +#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_RMASK 0xF +#define QIB_7322_IBCCtrlA_0_MaxPktLen_LSB 0x15 +#define QIB_7322_IBCCtrlA_0_MaxPktLen_MSB 0x1F +#define QIB_7322_IBCCtrlA_0_MaxPktLen_RMASK 0x7FF +#define QIB_7322_IBCCtrlA_0_LinkCmd_LSB 0x13 +#define QIB_7322_IBCCtrlA_0_LinkCmd_MSB 0x14 +#define QIB_7322_IBCCtrlA_0_LinkCmd_RMASK 0x3 +#define QIB_7322_IBCCtrlA_0_LinkInitCmd_LSB 0x10 +#define QIB_7322_IBCCtrlA_0_LinkInitCmd_MSB 0x12 +#define QIB_7322_IBCCtrlA_0_LinkInitCmd_RMASK 0x7 +#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_LSB 0x8 +#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_MSB 0xF +#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_RMASK 0xFF +#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_LSB 0x0 +#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_MSB 0x7 +#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_RMASK 0xFF + +#define QIB_7322_IBCCtrlB_0_OFFS 0x1568 +#define QIB_7322_IBCCtrlB_0_DEF 0x00000000000305FF +#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_LSB 0x30 +#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_MSB 0x3F +#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_RMASK 0xFFFF +#define QIB_7322_IBCCtrlB_0_IB_DLID_LSB 0x20 +#define QIB_7322_IBCCtrlB_0_IB_DLID_MSB 0x2F +#define QIB_7322_IBCCtrlB_0_IB_DLID_RMASK 0xFFFF +#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_LSB 0x1B +#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_MSB 0x1B +#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_LSB 0x1A +#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_MSB 0x1A +#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_LSB 0x12 +#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_MSB 0x19 +#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_RMASK 0xFF +#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_LSB 0x11 +#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_MSB 0x11 +#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_LSB 0x10 +#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_MSB 0x10 +#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_DDS_LSB 0xC +#define QIB_7322_IBCCtrlB_0_SD_DDS_MSB 0xF +#define QIB_7322_IBCCtrlB_0_SD_DDS_RMASK 0xF +#define QIB_7322_IBCCtrlB_0_SD_DDSV_LSB 0xB +#define QIB_7322_IBCCtrlB_0_SD_DDSV_MSB 0xB +#define QIB_7322_IBCCtrlB_0_SD_DDSV_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_LSB 0xA +#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_MSB 0xA +#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_LSB 0x9 +#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_MSB 0x9 +#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_LSB 0x8 +#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_MSB 0x8 +#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_LSB 0x7 +#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_MSB 0x7 +#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_LSB 0x5 +#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_MSB 0x6 +#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_RMASK 0x3 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_LSB 0x4 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_MSB 0x4 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_LSB 0x3 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_MSB 0x3 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_LSB 0x2 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_MSB 0x2 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_LSB 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_MSB 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_LSB 0x0 +#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_MSB 0x0 +#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_RMASK 0x1 + +#define QIB_7322_IBCCtrlC_0_OFFS 0x1570 +#define QIB_7322_IBCCtrlC_0_DEF 0x0000000000000301 +#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_LSB 0x5 +#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_MSB 0x9 +#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_RMASK 0x1F +#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_LSB 0x0 +#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_MSB 0x4 +#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_RMASK 0x1F + +#define QIB_7322_HRTBT_GUID_0_OFFS 0x1588 +#define QIB_7322_HRTBT_GUID_0_DEF 0x0000000000000000 + +#define QIB_7322_IB_SDTEST_IF_TX_0_OFFS 0x1590 +#define QIB_7322_IB_SDTEST_IF_TX_0_DEF 0x0000000000000000 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_LSB 0x30 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_MSB 0x3F +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_LSB 0x20 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_MSB 0x2F +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_LSB 0xD +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_MSB 0xF +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_RMASK 0x7 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_LSB 0xB +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_MSB 0xC +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_RMASK 0x3 +#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_LSB 0x4 +#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_MSB 0x4 +#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_RMASK 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_LSB 0x2 +#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_MSB 0x3 +#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_RMASK 0x3 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_LSB 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_MSB 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_RMASK 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_LSB 0x0 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_MSB 0x0 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_RMASK 0x1 + +#define QIB_7322_IB_SDTEST_IF_RX_0_OFFS 0x1598 +#define QIB_7322_IB_SDTEST_IF_RX_0_DEF 0x0000000000000000 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_LSB 0x30 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_MSB 0x3F +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_LSB 0x20 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_MSB 0x2F +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_LSB 0x18 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_MSB 0x1F +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_RMASK 0xFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_LSB 0x10 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_MSB 0x17 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_RMASK 0xFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_LSB 0x1 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_MSB 0x1 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_RMASK 0x1 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_LSB 0x0 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_MSB 0x0 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_RMASK 0x1 + +#define QIB_7322_IBNCModeCtrl_0_OFFS 0x15B8 +#define QIB_7322_IBNCModeCtrl_0_DEF 0x0000000000000000 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_LSB 0x22 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_MSB 0x22 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_LSB 0x21 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_MSB 0x21 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_LSB 0x20 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_MSB 0x20 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_LSB 0x11 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_MSB 0x19 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_RMASK 0x1FF +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_LSB 0x8 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_MSB 0x10 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_RMASK 0x1FF +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_LSB 0x2 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_MSB 0x2 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_LSB 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_MSB 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_LSB 0x0 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_MSB 0x0 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_RMASK 0x1 + +#define QIB_7322_IBSerdesStatus_0_OFFS 0x15D0 +#define QIB_7322_IBSerdesStatus_0_DEF 0x0000000000000000 + +#define QIB_7322_IBPCSConfig_0_OFFS 0x15D8 +#define QIB_7322_IBPCSConfig_0_DEF 0x0000000000000007 +#define QIB_7322_IBPCSConfig_0_link_sync_mask_LSB 0x9 +#define QIB_7322_IBPCSConfig_0_link_sync_mask_MSB 0x12 +#define QIB_7322_IBPCSConfig_0_link_sync_mask_RMASK 0x3FF +#define QIB_7322_IBPCSConfig_0_xcv_rreset_LSB 0x2 +#define QIB_7322_IBPCSConfig_0_xcv_rreset_MSB 0x2 +#define QIB_7322_IBPCSConfig_0_xcv_rreset_RMASK 0x1 +#define QIB_7322_IBPCSConfig_0_xcv_treset_LSB 0x1 +#define QIB_7322_IBPCSConfig_0_xcv_treset_MSB 0x1 +#define QIB_7322_IBPCSConfig_0_xcv_treset_RMASK 0x1 +#define QIB_7322_IBPCSConfig_0_tx_rx_reset_LSB 0x0 +#define QIB_7322_IBPCSConfig_0_tx_rx_reset_MSB 0x0 +#define QIB_7322_IBPCSConfig_0_tx_rx_reset_RMASK 0x1 + +#define QIB_7322_IBSerdesCtrl_0_OFFS 0x15E0 +#define QIB_7322_IBSerdesCtrl_0_DEF 0x0000000000FFA00F +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_LSB 0x1A +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_MSB 0x1A +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_LSB 0x19 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_MSB 0x19 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_LSB 0x18 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_MSB 0x18 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_LSB 0x14 +#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_MSB 0x17 +#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_RMASK 0xF +#define QIB_7322_IBSerdesCtrl_0_CGMODE_LSB 0x10 +#define QIB_7322_IBSerdesCtrl_0_CGMODE_MSB 0x13 +#define QIB_7322_IBSerdesCtrl_0_CGMODE_RMASK 0xF +#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_LSB 0xF +#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_MSB 0xF +#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_LSB 0xD +#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_MSB 0xD +#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_LPEN_LSB 0xC +#define QIB_7322_IBSerdesCtrl_0_LPEN_MSB 0xC +#define QIB_7322_IBSerdesCtrl_0_LPEN_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_PLLPD_LSB 0xB +#define QIB_7322_IBSerdesCtrl_0_PLLPD_MSB 0xB +#define QIB_7322_IBSerdesCtrl_0_PLLPD_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_TXPD_LSB 0xA +#define QIB_7322_IBSerdesCtrl_0_TXPD_MSB 0xA +#define QIB_7322_IBSerdesCtrl_0_TXPD_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_RXPD_LSB 0x9 +#define QIB_7322_IBSerdesCtrl_0_RXPD_MSB 0x9 +#define QIB_7322_IBSerdesCtrl_0_RXPD_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_TXIDLE_LSB 0x8 +#define QIB_7322_IBSerdesCtrl_0_TXIDLE_MSB 0x8 +#define QIB_7322_IBSerdesCtrl_0_TXIDLE_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_CMODE_LSB 0x0 +#define QIB_7322_IBSerdesCtrl_0_CMODE_MSB 0x6 +#define QIB_7322_IBSerdesCtrl_0_CMODE_RMASK 0x7F + +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_OFFS 0x1600 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_DEF 0x0000000000000000 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_LSB 0x1F +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_MSB 0x1F +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_RMASK 0x1 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_LSB 0x1E +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_MSB 0x1E +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_RMASK 0x1 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_LSB 0xE +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_MSB 0x11 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_RMASK 0xF +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_LSB 0x9 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_MSB 0xD +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_RMASK 0x1F +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_LSB 0x5 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_MSB 0x8 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_RMASK 0xF +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_LSB 0x3 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_MSB 0x4 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_RMASK 0x3 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_LSB 0x0 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_MSB 0x2 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_RMASK 0x7 + +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_OFFS 0x1640 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_OFFS 0x1648 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_OFFS 0x1650 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_OFFS 0x1658 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_OFFS 0x1660 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_OFFS 0x1668 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_TIMER_THRESHOLD_0_OFFS 0x1670 +#define QIB_7322_ADAPT_DISABLE_TIMER_THRESHOLD_0_DEF 0x0000000000000000 + +#define QIB_7322_HighPriorityLimit_0_OFFS 0x1BC0 +#define QIB_7322_HighPriorityLimit_0_DEF 0x0000000000000000 +#define QIB_7322_HighPriorityLimit_0_Limit_LSB 0x0 +#define QIB_7322_HighPriorityLimit_0_Limit_MSB 0x7 +#define QIB_7322_HighPriorityLimit_0_Limit_RMASK 0xFF + +#define QIB_7322_LowPriority0_0_OFFS 0x1C00 +#define QIB_7322_LowPriority0_0_DEF 0x0000000000000000 +#define QIB_7322_LowPriority0_0_VirtualLane_LSB 0x10 +#define QIB_7322_LowPriority0_0_VirtualLane_MSB 0x12 +#define QIB_7322_LowPriority0_0_VirtualLane_RMASK 0x7 +#define QIB_7322_LowPriority0_0_Weight_LSB 0x0 +#define QIB_7322_LowPriority0_0_Weight_MSB 0x7 +#define QIB_7322_LowPriority0_0_Weight_RMASK 0xFF + +#define QIB_7322_HighPriority0_0_OFFS 0x1E00 +#define QIB_7322_HighPriority0_0_DEF 0x0000000000000000 +#define QIB_7322_HighPriority0_0_VirtualLane_LSB 0x10 +#define QIB_7322_HighPriority0_0_VirtualLane_MSB 0x12 +#define QIB_7322_HighPriority0_0_VirtualLane_RMASK 0x7 +#define QIB_7322_HighPriority0_0_Weight_LSB 0x0 +#define QIB_7322_HighPriority0_0_Weight_MSB 0x7 +#define QIB_7322_HighPriority0_0_Weight_RMASK 0xFF + +#define QIB_7322_CntrRegBase_1_OFFS 0x2028 +#define QIB_7322_CntrRegBase_1_DEF 0x0000000000013000 + +#define QIB_7322_RcvQPMulticastContext_1_OFFS 0x2170 + +#define QIB_7322_SendCtrl_1_OFFS 0x21C0 + +#define QIB_7322_SendBufAvail0_OFFS 0x3000 +#define QIB_7322_SendBufAvail0_DEF 0x0000000000000000 +#define QIB_7322_SendBufAvail0_SendBuf_31_0_LSB 0x0 +#define QIB_7322_SendBufAvail0_SendBuf_31_0_MSB 0x3F +#define QIB_7322_SendBufAvail0_SendBuf_31_0_RMASK 0x0 + +#define QIB_7322_MsixTable_OFFS 0x8000 +#define QIB_7322_MsixTable_DEF 0x0000000000000000 + +#define QIB_7322_MsixPba_OFFS 0x9000 +#define QIB_7322_MsixPba_DEF 0x0000000000000000 + +#define QIB_7322_LAMemory_OFFS 0xA000 +#define QIB_7322_LAMemory_DEF 0x0000000000000000 + +#define QIB_7322_LBIntCnt_OFFS 0x11000 +#define QIB_7322_LBIntCnt_DEF 0x0000000000000000 + +#define QIB_7322_LBFlowStallCnt_OFFS 0x11008 +#define QIB_7322_LBFlowStallCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxTIDFullErrCnt_OFFS 0x110D0 +#define QIB_7322_RxTIDFullErrCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxTIDValidErrCnt_OFFS 0x110D8 +#define QIB_7322_RxTIDValidErrCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxP0HdrEgrOvflCnt_OFFS 0x110E8 +#define QIB_7322_RxP0HdrEgrOvflCnt_DEF 0x0000000000000000 + +#define QIB_7322_PcieRetryBufDiagQwordCnt_OFFS 0x111A0 +#define QIB_7322_PcieRetryBufDiagQwordCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxTidFlowDropCnt_OFFS 0x111E0 +#define QIB_7322_RxTidFlowDropCnt_DEF 0x0000000000000000 + +#define QIB_7322_LBIntCnt_0_OFFS 0x12000 +#define QIB_7322_LBIntCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxCreditUpToDateTimeOut_0_OFFS 0x12008 +#define QIB_7322_TxCreditUpToDateTimeOut_0_DEF 0x0000000000000000 + +#define QIB_7322_TxSDmaDescCnt_0_OFFS 0x12010 +#define QIB_7322_TxSDmaDescCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxUnsupVLErrCnt_0_OFFS 0x12018 +#define QIB_7322_TxUnsupVLErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxDataPktCnt_0_OFFS 0x12020 +#define QIB_7322_TxDataPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowPktCnt_0_OFFS 0x12028 +#define QIB_7322_TxFlowPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxDwordCnt_0_OFFS 0x12030 +#define QIB_7322_TxDwordCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxLenErrCnt_0_OFFS 0x12038 +#define QIB_7322_TxLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxMaxMinLenErrCnt_0_OFFS 0x12040 +#define QIB_7322_TxMaxMinLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxUnderrunCnt_0_OFFS 0x12048 +#define QIB_7322_TxUnderrunCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowStallCnt_0_OFFS 0x12050 +#define QIB_7322_TxFlowStallCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxDroppedPktCnt_0_OFFS 0x12058 +#define QIB_7322_TxDroppedPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDroppedPktCnt_0_OFFS 0x12060 +#define QIB_7322_RxDroppedPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDataPktCnt_0_OFFS 0x12068 +#define QIB_7322_RxDataPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowPktCnt_0_OFFS 0x12070 +#define QIB_7322_RxFlowPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDwordCnt_0_OFFS 0x12078 +#define QIB_7322_RxDwordCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLenErrCnt_0_OFFS 0x12080 +#define QIB_7322_RxLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxMaxMinLenErrCnt_0_OFFS 0x12088 +#define QIB_7322_RxMaxMinLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxICRCErrCnt_0_OFFS 0x12090 +#define QIB_7322_RxICRCErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVCRCErrCnt_0_OFFS 0x12098 +#define QIB_7322_RxVCRCErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowCtrlViolCnt_0_OFFS 0x120A0 +#define QIB_7322_RxFlowCtrlViolCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVersionErrCnt_0_OFFS 0x120A8 +#define QIB_7322_RxVersionErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLinkMalformCnt_0_OFFS 0x120B0 +#define QIB_7322_RxLinkMalformCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxEBPCnt_0_OFFS 0x120B8 +#define QIB_7322_RxEBPCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLPCRCErrCnt_0_OFFS 0x120C0 +#define QIB_7322_RxLPCRCErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxBufOvflCnt_0_OFFS 0x120C8 +#define QIB_7322_RxBufOvflCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLenTruncateCnt_0_OFFS 0x120D0 +#define QIB_7322_RxLenTruncateCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxPKeyMismatchCnt_0_OFFS 0x120E0 +#define QIB_7322_RxPKeyMismatchCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkDownedCnt_0_OFFS 0x12180 +#define QIB_7322_IBLinkDownedCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBSymbolErrCnt_0_OFFS 0x12188 +#define QIB_7322_IBSymbolErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBStatusChangeCnt_0_OFFS 0x12190 +#define QIB_7322_IBStatusChangeCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkErrRecoveryCnt_0_OFFS 0x12198 +#define QIB_7322_IBLinkErrRecoveryCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_ExcessBufferOvflCnt_0_OFFS 0x121A8 +#define QIB_7322_ExcessBufferOvflCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_LocalLinkIntegrityErrCnt_0_OFFS 0x121B0 +#define QIB_7322_LocalLinkIntegrityErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVlErrCnt_0_OFFS 0x121B8 +#define QIB_7322_RxVlErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDlidFltrCnt_0_OFFS 0x121C0 +#define QIB_7322_RxDlidFltrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVL15DroppedPktCnt_0_OFFS 0x121C8 +#define QIB_7322_RxVL15DroppedPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxOtherLocalPhyErrCnt_0_OFFS 0x121D0 +#define QIB_7322_RxOtherLocalPhyErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxQPInvalidContextCnt_0_OFFS 0x121D8 +#define QIB_7322_RxQPInvalidContextCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxHeadersErrCnt_0_OFFS 0x121F8 +#define QIB_7322_TxHeadersErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvDataCount_0_OFFS 0x12218 +#define QIB_7322_PSRcvDataCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvPktsCount_0_OFFS 0x12220 +#define QIB_7322_PSRcvPktsCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitDataCount_0_OFFS 0x12228 +#define QIB_7322_PSXmitDataCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitPktsCount_0_OFFS 0x12230 +#define QIB_7322_PSXmitPktsCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitWaitCount_0_OFFS 0x12238 +#define QIB_7322_PSXmitWaitCount_0_DEF 0x0000000000000000 + +#define QIB_7322_LBIntCnt_1_OFFS 0x13000 +#define QIB_7322_LBIntCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxCreditUpToDateTimeOut_1_OFFS 0x13008 +#define QIB_7322_TxCreditUpToDateTimeOut_1_DEF 0x0000000000000000 + +#define QIB_7322_TxSDmaDescCnt_1_OFFS 0x13010 +#define QIB_7322_TxSDmaDescCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxUnsupVLErrCnt_1_OFFS 0x13018 +#define QIB_7322_TxUnsupVLErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxDataPktCnt_1_OFFS 0x13020 +#define QIB_7322_TxDataPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowPktCnt_1_OFFS 0x13028 +#define QIB_7322_TxFlowPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxDwordCnt_1_OFFS 0x13030 +#define QIB_7322_TxDwordCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxLenErrCnt_1_OFFS 0x13038 +#define QIB_7322_TxLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxMaxMinLenErrCnt_1_OFFS 0x13040 +#define QIB_7322_TxMaxMinLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxUnderrunCnt_1_OFFS 0x13048 +#define QIB_7322_TxUnderrunCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowStallCnt_1_OFFS 0x13050 +#define QIB_7322_TxFlowStallCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxDroppedPktCnt_1_OFFS 0x13058 +#define QIB_7322_TxDroppedPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDroppedPktCnt_1_OFFS 0x13060 +#define QIB_7322_RxDroppedPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDataPktCnt_1_OFFS 0x13068 +#define QIB_7322_RxDataPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowPktCnt_1_OFFS 0x13070 +#define QIB_7322_RxFlowPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDwordCnt_1_OFFS 0x13078 +#define QIB_7322_RxDwordCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLenErrCnt_1_OFFS 0x13080 +#define QIB_7322_RxLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxMaxMinLenErrCnt_1_OFFS 0x13088 +#define QIB_7322_RxMaxMinLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxICRCErrCnt_1_OFFS 0x13090 +#define QIB_7322_RxICRCErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVCRCErrCnt_1_OFFS 0x13098 +#define QIB_7322_RxVCRCErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowCtrlViolCnt_1_OFFS 0x130A0 +#define QIB_7322_RxFlowCtrlViolCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVersionErrCnt_1_OFFS 0x130A8 +#define QIB_7322_RxVersionErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLinkMalformCnt_1_OFFS 0x130B0 +#define QIB_7322_RxLinkMalformCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxEBPCnt_1_OFFS 0x130B8 +#define QIB_7322_RxEBPCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLPCRCErrCnt_1_OFFS 0x130C0 +#define QIB_7322_RxLPCRCErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxBufOvflCnt_1_OFFS 0x130C8 +#define QIB_7322_RxBufOvflCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLenTruncateCnt_1_OFFS 0x130D0 +#define QIB_7322_RxLenTruncateCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxPKeyMismatchCnt_1_OFFS 0x130E0 +#define QIB_7322_RxPKeyMismatchCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkDownedCnt_1_OFFS 0x13180 +#define QIB_7322_IBLinkDownedCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBSymbolErrCnt_1_OFFS 0x13188 +#define QIB_7322_IBSymbolErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBStatusChangeCnt_1_OFFS 0x13190 +#define QIB_7322_IBStatusChangeCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkErrRecoveryCnt_1_OFFS 0x13198 +#define QIB_7322_IBLinkErrRecoveryCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_ExcessBufferOvflCnt_1_OFFS 0x131A8 +#define QIB_7322_ExcessBufferOvflCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_LocalLinkIntegrityErrCnt_1_OFFS 0x131B0 +#define QIB_7322_LocalLinkIntegrityErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVlErrCnt_1_OFFS 0x131B8 +#define QIB_7322_RxVlErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDlidFltrCnt_1_OFFS 0x131C0 +#define QIB_7322_RxDlidFltrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVL15DroppedPktCnt_1_OFFS 0x131C8 +#define QIB_7322_RxVL15DroppedPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxOtherLocalPhyErrCnt_1_OFFS 0x131D0 +#define QIB_7322_RxOtherLocalPhyErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxQPInvalidContextCnt_1_OFFS 0x131D8 +#define QIB_7322_RxQPInvalidContextCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxHeadersErrCnt_1_OFFS 0x131F8 +#define QIB_7322_TxHeadersErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvDataCount_1_OFFS 0x13218 +#define QIB_7322_PSRcvDataCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvPktsCount_1_OFFS 0x13220 +#define QIB_7322_PSRcvPktsCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitDataCount_1_OFFS 0x13228 +#define QIB_7322_PSXmitDataCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitPktsCount_1_OFFS 0x13230 +#define QIB_7322_PSXmitPktsCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitWaitCount_1_OFFS 0x13238 +#define QIB_7322_PSXmitWaitCount_1_DEF 0x0000000000000000 + +#define QIB_7322_RcvEgrArray_OFFS 0x14000 +#define QIB_7322_RcvEgrArray_DEF 0x0000000000000000 +#define QIB_7322_RcvEgrArray_RT_BufSize_LSB 0x25 +#define QIB_7322_RcvEgrArray_RT_BufSize_MSB 0x27 +#define QIB_7322_RcvEgrArray_RT_BufSize_RMASK 0x7 +#define QIB_7322_RcvEgrArray_RT_Addr_LSB 0x0 +#define QIB_7322_RcvEgrArray_RT_Addr_MSB 0x24 +#define QIB_7322_RcvEgrArray_RT_Addr_RMASK 0x1FFFFFFFFF + +#define QIB_7322_RcvTIDArray0_OFFS 0x50000 +#define QIB_7322_RcvTIDArray0_DEF 0x0000000000000000 +#define QIB_7322_RcvTIDArray0_RT_BufSize_LSB 0x25 +#define QIB_7322_RcvTIDArray0_RT_BufSize_MSB 0x27 +#define QIB_7322_RcvTIDArray0_RT_BufSize_RMASK 0x7 +#define QIB_7322_RcvTIDArray0_RT_Addr_LSB 0x0 +#define QIB_7322_RcvTIDArray0_RT_Addr_MSB 0x24 +#define QIB_7322_RcvTIDArray0_RT_Addr_RMASK 0x1FFFFFFFFF + +#define QIB_7322_IBSD_DDS_MAP_TABLE_0_OFFS 0xD0000 +#define QIB_7322_IBSD_DDS_MAP_TABLE_0_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrTail0_OFFS 0x200000 +#define QIB_7322_RcvHdrTail0_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrHead0_OFFS 0x200008 +#define QIB_7322_RcvHdrHead0_DEF 0x0000000000000000 +#define QIB_7322_RcvHdrHead0_counter_LSB 0x20 +#define QIB_7322_RcvHdrHead0_counter_MSB 0x2F +#define QIB_7322_RcvHdrHead0_counter_RMASK 0xFFFF +#define QIB_7322_RcvHdrHead0_RcvHeadPointer_LSB 0x0 +#define QIB_7322_RcvHdrHead0_RcvHeadPointer_MSB 0x1F +#define QIB_7322_RcvHdrHead0_RcvHeadPointer_RMASK 0xFFFFFFFF + +#define QIB_7322_RcvEgrIndexTail0_OFFS 0x200010 +#define QIB_7322_RcvEgrIndexTail0_DEF 0x0000000000000000 + +#define QIB_7322_RcvEgrIndexHead0_OFFS 0x200018 +#define QIB_7322_RcvEgrIndexHead0_DEF 0x0000000000000000 + +#define QIB_7322_RcvTIDFlowTable0_OFFS 0x201000 +#define QIB_7322_RcvTIDFlowTable0_DEF 0x0000000000000000 +#define QIB_7322_RcvTIDFlowTable0_GenMismatch_LSB 0x1C +#define QIB_7322_RcvTIDFlowTable0_GenMismatch_MSB 0x1C +#define QIB_7322_RcvTIDFlowTable0_GenMismatch_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_LSB 0x1B +#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_MSB 0x1B +#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_LSB 0x16 +#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_MSB 0x16 +#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_LSB 0x15 +#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_MSB 0x15 +#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_LSB 0x14 +#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_MSB 0x14 +#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_FlowValid_LSB 0x13 +#define QIB_7322_RcvTIDFlowTable0_FlowValid_MSB 0x13 +#define QIB_7322_RcvTIDFlowTable0_FlowValid_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_GenVal_LSB 0xB +#define QIB_7322_RcvTIDFlowTable0_GenVal_MSB 0x12 +#define QIB_7322_RcvTIDFlowTable0_GenVal_RMASK 0xFF +#define QIB_7322_RcvTIDFlowTable0_SeqNum_LSB 0x0 +#define QIB_7322_RcvTIDFlowTable0_SeqNum_MSB 0xA +#define QIB_7322_RcvTIDFlowTable0_SeqNum_RMASK 0x7FF diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h new file mode 100644 index 00000000..145da404 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -0,0 +1,770 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _QIB_COMMON_H +#define _QIB_COMMON_H + +/* + * This file contains defines, structures, etc. that are used + * to communicate between kernel and user code. + */ + +/* This is the IEEE-assigned OUI for QLogic Inc. QLogic_IB */ +#define QIB_SRC_OUI_1 0x00 +#define QIB_SRC_OUI_2 0x11 +#define QIB_SRC_OUI_3 0x75 + +/* version of protocol header (known to chip also). In the long run, + * we should be able to generate and accept a range of version numbers; + * for now we only accept one, and it's compiled in. + */ +#define IPS_PROTO_VERSION 2 + +/* + * These are compile time constants that you may want to enable or disable + * if you are trying to debug problems with code or performance. + * QIB_VERBOSE_TRACING define as 1 if you want additional tracing in + * fastpath code + * QIB_TRACE_REGWRITES define as 1 if you want register writes to be + * traced in faspath code + * _QIB_TRACING define as 0 if you want to remove all tracing in a + * compilation unit + */ + +/* + * The value in the BTH QP field that QLogic_IB uses to differentiate + * an qlogic_ib protocol IB packet vs standard IB transport + * This it needs to be even (0x656b78), because the LSB is sometimes + * used for the MSB of context. The change may cause a problem + * interoperating with older software. + */ +#define QIB_KD_QP 0x656b78 + +/* + * These are the status bits readable (in ascii form, 64bit value) + * from the "status" sysfs file. For binary compatibility, values + * must remain as is; removed states can be reused for different + * purposes. + */ +#define QIB_STATUS_INITTED 0x1 /* basic initialization done */ +/* Chip has been found and initted */ +#define QIB_STATUS_CHIP_PRESENT 0x20 +/* IB link is at ACTIVE, usable for data traffic */ +#define QIB_STATUS_IB_READY 0x40 +/* link is configured, LID, MTU, etc. have been set */ +#define QIB_STATUS_IB_CONF 0x80 +/* A Fatal hardware error has occurred. */ +#define QIB_STATUS_HWERROR 0x200 + +/* + * The list of usermode accessible registers. Also see Reg_* later in file. + */ +enum qib_ureg { + /* (RO) DMA RcvHdr to be used next. */ + ur_rcvhdrtail = 0, + /* (RW) RcvHdr entry to be processed next by host. */ + ur_rcvhdrhead = 1, + /* (RO) Index of next Eager index to use. */ + ur_rcvegrindextail = 2, + /* (RW) Eager TID to be processed next */ + ur_rcvegrindexhead = 3, + /* For internal use only; max register number. */ + _QIB_UregMax +}; + +/* bit values for spi_runtime_flags */ +#define QIB_RUNTIME_PCIE 0x0002 +#define QIB_RUNTIME_FORCE_WC_ORDER 0x0004 +#define QIB_RUNTIME_RCVHDR_COPY 0x0008 +#define QIB_RUNTIME_MASTER 0x0010 +#define QIB_RUNTIME_RCHK 0x0020 +#define QIB_RUNTIME_NODMA_RTAIL 0x0080 +#define QIB_RUNTIME_SPECIAL_TRIGGER 0x0100 +#define QIB_RUNTIME_SDMA 0x0200 +#define QIB_RUNTIME_FORCE_PIOAVAIL 0x0400 +#define QIB_RUNTIME_PIO_REGSWAPPED 0x0800 +#define QIB_RUNTIME_CTXT_MSB_IN_QP 0x1000 +#define QIB_RUNTIME_CTXT_REDIRECT 0x2000 +#define QIB_RUNTIME_HDRSUPP 0x4000 + +/* + * This structure is returned by qib_userinit() immediately after + * open to get implementation-specific info, and info specific to this + * instance. + * + * This struct must have explict pad fields where type sizes + * may result in different alignments between 32 and 64 bit + * programs, since the 64 bit * bit kernel requires the user code + * to have matching offsets + */ +struct qib_base_info { + /* version of hardware, for feature checking. */ + __u32 spi_hw_version; + /* version of software, for feature checking. */ + __u32 spi_sw_version; + /* QLogic_IB context assigned, goes into sent packets */ + __u16 spi_ctxt; + __u16 spi_subctxt; + /* + * IB MTU, packets IB data must be less than this. + * The MTU is in bytes, and will be a multiple of 4 bytes. + */ + __u32 spi_mtu; + /* + * Size of a PIO buffer. Any given packet's total size must be less + * than this (in words). Included is the starting control word, so + * if 513 is returned, then total pkt size is 512 words or less. + */ + __u32 spi_piosize; + /* size of the TID cache in qlogic_ib, in entries */ + __u32 spi_tidcnt; + /* size of the TID Eager list in qlogic_ib, in entries */ + __u32 spi_tidegrcnt; + /* size of a single receive header queue entry in words. */ + __u32 spi_rcvhdrent_size; + /* + * Count of receive header queue entries allocated. + * This may be less than the spu_rcvhdrcnt passed in!. + */ + __u32 spi_rcvhdr_cnt; + + /* per-chip and other runtime features bitmap (QIB_RUNTIME_*) */ + __u32 spi_runtime_flags; + + /* address where hardware receive header queue is mapped */ + __u64 spi_rcvhdr_base; + + /* user program. */ + + /* base address of eager TID receive buffers used by hardware. */ + __u64 spi_rcv_egrbufs; + + /* Allocated by initialization code, not by protocol. */ + + /* + * Size of each TID buffer in host memory, starting at + * spi_rcv_egrbufs. The buffers are virtually contiguous. + */ + __u32 spi_rcv_egrbufsize; + /* + * The special QP (queue pair) value that identifies an qlogic_ib + * protocol packet from standard IB packets. More, probably much + * more, to be added. + */ + __u32 spi_qpair; + + /* + * User register base for init code, not to be used directly by + * protocol or applications. Always points to chip registers, + * for normal or shared context. + */ + __u64 spi_uregbase; + /* + * Maximum buffer size in bytes that can be used in a single TID + * entry (assuming the buffer is aligned to this boundary). This is + * the minimum of what the hardware and software support Guaranteed + * to be a power of 2. + */ + __u32 spi_tid_maxsize; + /* + * alignment of each pio send buffer (byte count + * to add to spi_piobufbase to get to second buffer) + */ + __u32 spi_pioalign; + /* + * The index of the first pio buffer available to this process; + * needed to do lookup in spi_pioavailaddr; not added to + * spi_piobufbase. + */ + __u32 spi_pioindex; + /* number of buffers mapped for this process */ + __u32 spi_piocnt; + + /* + * Base address of writeonly pio buffers for this process. + * Each buffer has spi_piosize words, and is aligned on spi_pioalign + * boundaries. spi_piocnt buffers are mapped from this address + */ + __u64 spi_piobufbase; + + /* + * Base address of readonly memory copy of the pioavail registers. + * There are 2 bits for each buffer. + */ + __u64 spi_pioavailaddr; + + /* + * Address where driver updates a copy of the interface and driver + * status (QIB_STATUS_*) as a 64 bit value. It's followed by a + * link status qword (formerly combined with driver status), then a + * string indicating hardware error, if there was one. + */ + __u64 spi_status; + + /* number of chip ctxts available to user processes */ + __u32 spi_nctxts; + __u16 spi_unit; /* unit number of chip we are using */ + __u16 spi_port; /* IB port number we are using */ + /* num bufs in each contiguous set */ + __u32 spi_rcv_egrperchunk; + /* size in bytes of each contiguous set */ + __u32 spi_rcv_egrchunksize; + /* total size of mmap to cover full rcvegrbuffers */ + __u32 spi_rcv_egrbuftotlen; + __u32 spi_rhf_offset; /* dword offset in hdrqent for rcvhdr flags */ + /* address of readonly memory copy of the rcvhdrq tail register. */ + __u64 spi_rcvhdr_tailaddr; + + /* + * shared memory pages for subctxts if ctxt is shared; these cover + * all the processes in the group sharing a single context. + * all have enough space for the num_subcontexts value on this job. + */ + __u64 spi_subctxt_uregbase; + __u64 spi_subctxt_rcvegrbuf; + __u64 spi_subctxt_rcvhdr_base; + + /* shared memory page for send buffer disarm status */ + __u64 spi_sendbuf_status; +} __attribute__ ((aligned(8))); + +/* + * This version number is given to the driver by the user code during + * initialization in the spu_userversion field of qib_user_info, so + * the driver can check for compatibility with user code. + * + * The major version changes when data structures + * change in an incompatible way. The driver must be the same or higher + * for initialization to succeed. In some cases, a higher version + * driver will not interoperate with older software, and initialization + * will return an error. + */ +#define QIB_USER_SWMAJOR 1 + +/* + * Minor version differences are always compatible + * a within a major version, however if user software is larger + * than driver software, some new features and/or structure fields + * may not be implemented; the user code must deal with this if it + * cares, or it must abort after initialization reports the difference. + */ +#define QIB_USER_SWMINOR 11 + +#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) + +#ifndef QIB_KERN_TYPE +#define QIB_KERN_TYPE 0 +#define QIB_IDSTR "QLogic kernel.org driver" +#endif + +/* + * Similarly, this is the kernel version going back to the user. It's + * slightly different, in that we want to tell if the driver was built as + * part of a QLogic release, or from the driver from openfabrics.org, + * kernel.org, or a standard distribution, for support reasons. + * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied. + * + * It's returned by the driver to the user code during initialization in the + * spi_sw_version field of qib_base_info, so the user code can in turn + * check for compatibility with the kernel. +*/ +#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) + +/* + * If the unit is specified via open, HCA choice is fixed. If port is + * specified, it's also fixed. Otherwise we try to spread contexts + * across ports and HCAs, using different algorithims. WITHIN is + * the old default, prior to this mechanism. + */ +#define QIB_PORT_ALG_ACROSS 0 /* round robin contexts across HCAs, then + * ports; this is the default */ +#define QIB_PORT_ALG_WITHIN 1 /* use all contexts on an HCA (round robin + * active ports within), then next HCA */ +#define QIB_PORT_ALG_COUNT 2 /* number of algorithm choices */ + +/* + * This structure is passed to qib_userinit() to tell the driver where + * user code buffers are, sizes, etc. The offsets and sizes of the + * fields must remain unchanged, for binary compatibility. It can + * be extended, if userversion is changed so user code can tell, if needed + */ +struct qib_user_info { + /* + * version of user software, to detect compatibility issues. + * Should be set to QIB_USER_SWVERSION. + */ + __u32 spu_userversion; + + __u32 _spu_unused2; + + /* size of struct base_info to write to */ + __u32 spu_base_info_size; + + __u32 spu_port_alg; /* which QIB_PORT_ALG_*; unused user minor < 11 */ + + /* + * If two or more processes wish to share a context, each process + * must set the spu_subctxt_cnt and spu_subctxt_id to the same + * values. The only restriction on the spu_subctxt_id is that + * it be unique for a given node. + */ + __u16 spu_subctxt_cnt; + __u16 spu_subctxt_id; + + __u32 spu_port; /* IB port requested by user if > 0 */ + + /* + * address of struct base_info to write to + */ + __u64 spu_base_info; + +} __attribute__ ((aligned(8))); + +/* User commands. */ + +/* 16 available, was: old set up userspace (for old user code) */ +#define QIB_CMD_CTXT_INFO 17 /* find out what resources we got */ +#define QIB_CMD_RECV_CTRL 18 /* control receipt of packets */ +#define QIB_CMD_TID_UPDATE 19 /* update expected TID entries */ +#define QIB_CMD_TID_FREE 20 /* free expected TID entries */ +#define QIB_CMD_SET_PART_KEY 21 /* add partition key */ +/* 22 available, was: return info on slave processes (for old user code) */ +#define QIB_CMD_ASSIGN_CTXT 23 /* allocate HCA and ctxt */ +#define QIB_CMD_USER_INIT 24 /* set up userspace */ +#define QIB_CMD_UNUSED_1 25 +#define QIB_CMD_UNUSED_2 26 +#define QIB_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ +#define QIB_CMD_POLL_TYPE 28 /* set the kind of polling we want */ +#define QIB_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ +/* 30 is unused */ +#define QIB_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ +#define QIB_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ +/* 33 available, was a testing feature */ +#define QIB_CMD_DISARM_BUFS 34 /* disarm send buffers w/ errors */ +#define QIB_CMD_ACK_EVENT 35 /* ack & clear bits */ +#define QIB_CMD_CPUS_LIST 36 /* list of cpus allocated, for pinned + * processes: qib_cpus_list */ + +/* + * QIB_CMD_ACK_EVENT obsoletes QIB_CMD_DISARM_BUFS, but we keep it for + * compatibility with libraries from previous release. The ACK_EVENT + * will take appropriate driver action (if any, just DISARM for now), + * then clear the bits passed in as part of the mask. These bits are + * in the first 64bit word at spi_sendbuf_status, and are passed to + * the driver in the event_mask union as well. + */ +#define _QIB_EVENT_DISARM_BUFS_BIT 0 +#define _QIB_EVENT_LINKDOWN_BIT 1 +#define _QIB_EVENT_LID_CHANGE_BIT 2 +#define _QIB_EVENT_LMC_CHANGE_BIT 3 +#define _QIB_EVENT_SL2VL_CHANGE_BIT 4 +#define _QIB_MAX_EVENT_BIT _QIB_EVENT_SL2VL_CHANGE_BIT + +#define QIB_EVENT_DISARM_BUFS_BIT (1UL << _QIB_EVENT_DISARM_BUFS_BIT) +#define QIB_EVENT_LINKDOWN_BIT (1UL << _QIB_EVENT_LINKDOWN_BIT) +#define QIB_EVENT_LID_CHANGE_BIT (1UL << _QIB_EVENT_LID_CHANGE_BIT) +#define QIB_EVENT_LMC_CHANGE_BIT (1UL << _QIB_EVENT_LMC_CHANGE_BIT) +#define QIB_EVENT_SL2VL_CHANGE_BIT (1UL << _QIB_EVENT_SL2VL_CHANGE_BIT) + + +/* + * Poll types + */ +#define QIB_POLL_TYPE_ANYRCV 0x0 +#define QIB_POLL_TYPE_URGENT 0x1 + +struct qib_ctxt_info { + __u16 num_active; /* number of active units */ + __u16 unit; /* unit (chip) assigned to caller */ + __u16 port; /* IB port assigned to caller (1-based) */ + __u16 ctxt; /* ctxt on unit assigned to caller */ + __u16 subctxt; /* subctxt on unit assigned to caller */ + __u16 num_ctxts; /* number of ctxts available on unit */ + __u16 num_subctxts; /* number of subctxts opened on ctxt */ + __u16 rec_cpu; /* cpu # for affinity (ffff if none) */ +}; + +struct qib_tid_info { + __u32 tidcnt; + /* make structure same size in 32 and 64 bit */ + __u32 tid__unused; + /* virtual address of first page in transfer */ + __u64 tidvaddr; + /* pointer (same size 32/64 bit) to __u16 tid array */ + __u64 tidlist; + + /* + * pointer (same size 32/64 bit) to bitmap of TIDs used + * for this call; checked for being large enough at open + */ + __u64 tidmap; +}; + +struct qib_cmd { + __u32 type; /* command type */ + union { + struct qib_tid_info tid_info; + struct qib_user_info user_info; + + /* + * address in userspace where we should put the sdma + * inflight counter + */ + __u64 sdma_inflight; + /* + * address in userspace where we should put the sdma + * completion counter + */ + __u64 sdma_complete; + /* address in userspace of struct qib_ctxt_info to + write result to */ + __u64 ctxt_info; + /* enable/disable receipt of packets */ + __u32 recv_ctrl; + /* enable/disable armlaunch errors (non-zero to enable) */ + __u32 armlaunch_ctrl; + /* partition key to set */ + __u16 part_key; + /* user address of __u32 bitmask of active slaves */ + __u64 slave_mask_addr; + /* type of polling we want */ + __u16 poll_type; + /* back pressure enable bit for one particular context */ + __u8 ctxt_bp; + /* qib_user_event_ack(), IPATH_EVENT_* bits */ + __u64 event_mask; + } cmd; +}; + +struct qib_iovec { + /* Pointer to data, but same size 32 and 64 bit */ + __u64 iov_base; + + /* + * Length of data; don't need 64 bits, but want + * qib_sendpkt to remain same size as before 32 bit changes, so... + */ + __u64 iov_len; +}; + +/* + * Describes a single packet for send. Each packet can have one or more + * buffers, but the total length (exclusive of IB headers) must be less + * than the MTU, and if using the PIO method, entire packet length, + * including IB headers, must be less than the qib_piosize value (words). + * Use of this necessitates including sys/uio.h + */ +struct __qib_sendpkt { + __u32 sps_flags; /* flags for packet (TBD) */ + __u32 sps_cnt; /* number of entries to use in sps_iov */ + /* array of iov's describing packet. TEMPORARY */ + struct qib_iovec sps_iov[4]; +}; + +/* + * Diagnostics can send a packet by "writing" the following + * structs to the diag data special file. + * This allows a custom + * pbc (+ static rate) qword, so that special modes and deliberate + * changes to CRCs can be used. The elements were also re-ordered + * for better alignment and to avoid padding issues. + */ +#define _DIAG_XPKT_VERS 3 +struct qib_diag_xpkt { + __u16 version; + __u16 unit; + __u16 port; + __u16 len; + __u64 data; + __u64 pbc_wd; +}; + +/* + * Data layout in I2C flash (for GUID, etc.) + * All fields are little-endian binary unless otherwise stated + */ +#define QIB_FLASH_VERSION 2 +struct qib_flash { + /* flash layout version (QIB_FLASH_VERSION) */ + __u8 if_fversion; + /* checksum protecting if_length bytes */ + __u8 if_csum; + /* + * valid length (in use, protected by if_csum), including + * if_fversion and if_csum themselves) + */ + __u8 if_length; + /* the GUID, in network order */ + __u8 if_guid[8]; + /* number of GUIDs to use, starting from if_guid */ + __u8 if_numguid; + /* the (last 10 characters of) board serial number, in ASCII */ + char if_serial[12]; + /* board mfg date (YYYYMMDD ASCII) */ + char if_mfgdate[8]; + /* last board rework/test date (YYYYMMDD ASCII) */ + char if_testdate[8]; + /* logging of error counts, TBD */ + __u8 if_errcntp[4]; + /* powered on hours, updated at driver unload */ + __u8 if_powerhour[2]; + /* ASCII free-form comment field */ + char if_comment[32]; + /* Backwards compatible prefix for longer QLogic Serial Numbers */ + char if_sprefix[4]; + /* 82 bytes used, min flash size is 128 bytes */ + __u8 if_future[46]; +}; + +/* + * These are the counters implemented in the chip, and are listed in order. + * The InterCaps naming is taken straight from the chip spec. + */ +struct qlogic_ib_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 TxSDmaDescCnt; /* was Reserved1 */ + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 RxP9HdrEgrOvflCnt; + __u64 RxP10HdrEgrOvflCnt; + __u64 RxP11HdrEgrOvflCnt; + __u64 RxP12HdrEgrOvflCnt; + __u64 RxP13HdrEgrOvflCnt; + __u64 RxP14HdrEgrOvflCnt; + __u64 RxP15HdrEgrOvflCnt; + __u64 RxP16HdrEgrOvflCnt; + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; + __u64 RxVL15DroppedPktCnt; + __u64 RxOtherLocalPhyErrCnt; + __u64 PcieRetryBufDiagQwordCnt; + __u64 ExcessBufferOvflCnt; + __u64 LocalLinkIntegrityErrCnt; + __u64 RxVlErrCnt; + __u64 RxDlidFltrCnt; +}; + +/* + * The next set of defines are for packet headers, and chip register + * and memory bits that are visible to and/or used by user-mode software. + */ + +/* RcvHdrFlags bits */ +#define QLOGIC_IB_RHF_LENGTH_MASK 0x7FF +#define QLOGIC_IB_RHF_LENGTH_SHIFT 0 +#define QLOGIC_IB_RHF_RCVTYPE_MASK 0x7 +#define QLOGIC_IB_RHF_RCVTYPE_SHIFT 11 +#define QLOGIC_IB_RHF_EGRINDEX_MASK 0xFFF +#define QLOGIC_IB_RHF_EGRINDEX_SHIFT 16 +#define QLOGIC_IB_RHF_SEQ_MASK 0xF +#define QLOGIC_IB_RHF_SEQ_SHIFT 0 +#define QLOGIC_IB_RHF_HDRQ_OFFSET_MASK 0x7FF +#define QLOGIC_IB_RHF_HDRQ_OFFSET_SHIFT 4 +#define QLOGIC_IB_RHF_H_ICRCERR 0x80000000 +#define QLOGIC_IB_RHF_H_VCRCERR 0x40000000 +#define QLOGIC_IB_RHF_H_PARITYERR 0x20000000 +#define QLOGIC_IB_RHF_H_LENERR 0x10000000 +#define QLOGIC_IB_RHF_H_MTUERR 0x08000000 +#define QLOGIC_IB_RHF_H_IHDRERR 0x04000000 +#define QLOGIC_IB_RHF_H_TIDERR 0x02000000 +#define QLOGIC_IB_RHF_H_MKERR 0x01000000 +#define QLOGIC_IB_RHF_H_IBERR 0x00800000 +#define QLOGIC_IB_RHF_H_ERR_MASK 0xFF800000 +#define QLOGIC_IB_RHF_L_USE_EGR 0x80000000 +#define QLOGIC_IB_RHF_L_SWA 0x00008000 +#define QLOGIC_IB_RHF_L_SWB 0x00004000 + +/* qlogic_ib header fields */ +#define QLOGIC_IB_I_VERS_MASK 0xF +#define QLOGIC_IB_I_VERS_SHIFT 28 +#define QLOGIC_IB_I_CTXT_MASK 0xF +#define QLOGIC_IB_I_CTXT_SHIFT 24 +#define QLOGIC_IB_I_TID_MASK 0x7FF +#define QLOGIC_IB_I_TID_SHIFT 13 +#define QLOGIC_IB_I_OFFSET_MASK 0x1FFF +#define QLOGIC_IB_I_OFFSET_SHIFT 0 + +/* K_PktFlags bits */ +#define QLOGIC_IB_KPF_INTR 0x1 +#define QLOGIC_IB_KPF_SUBCTXT_MASK 0x3 +#define QLOGIC_IB_KPF_SUBCTXT_SHIFT 1 + +#define QLOGIC_IB_MAX_SUBCTXT 4 + +/* SendPIO per-buffer control */ +#define QLOGIC_IB_SP_TEST 0x40 +#define QLOGIC_IB_SP_TESTEBP 0x20 +#define QLOGIC_IB_SP_TRIGGER_SHIFT 15 + +/* SendPIOAvail bits */ +#define QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT 1 +#define QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT 0 + +/* qlogic_ib header format */ +struct qib_header { + /* + * Version - 4 bits, Context - 4 bits, TID - 10 bits and Offset - + * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, + * Context 4, TID 11, offset 13. + */ + __le32 ver_ctxt_tid_offset; + __le16 chksum; + __le16 pkt_flags; +}; + +/* + * qlogic_ib user message header format. + * This structure contains the first 4 fields common to all protocols + * that employ qlogic_ib. + */ +struct qib_message_header { + __be16 lrh[4]; + __be32 bth[3]; + /* fields below this point are in host byte order */ + struct qib_header iph; + __u8 sub_opcode; +}; + +/* IB - LRH header consts */ +#define QIB_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ +#define QIB_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ + +/* misc. */ +#define SIZE_OF_CRC 1 + +#define QIB_DEFAULT_P_KEY 0xFFFF +#define QIB_PERMISSIVE_LID 0xFFFF +#define QIB_AETH_CREDIT_SHIFT 24 +#define QIB_AETH_CREDIT_MASK 0x1F +#define QIB_AETH_CREDIT_INVAL 0x1F +#define QIB_PSN_MASK 0xFFFFFF +#define QIB_MSN_MASK 0xFFFFFF +#define QIB_QPN_MASK 0xFFFFFF +#define QIB_MULTICAST_LID_BASE 0xC000 +#define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK +#define QIB_MULTICAST_QPN 0xFFFFFF + +/* Receive Header Queue: receive type (from qlogic_ib) */ +#define RCVHQ_RCV_TYPE_EXPECTED 0 +#define RCVHQ_RCV_TYPE_EAGER 1 +#define RCVHQ_RCV_TYPE_NON_KD 2 +#define RCVHQ_RCV_TYPE_ERROR 3 + +#define QIB_HEADER_QUEUE_WORDS 9 + +/* functions for extracting fields from rcvhdrq entries for the driver. + */ +static inline __u32 qib_hdrget_err_flags(const __le32 *rbuf) +{ + return __le32_to_cpu(rbuf[1]) & QLOGIC_IB_RHF_H_ERR_MASK; +} + +static inline __u32 qib_hdrget_rcv_type(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_RCVTYPE_SHIFT) & + QLOGIC_IB_RHF_RCVTYPE_MASK; +} + +static inline __u32 qib_hdrget_length_in_bytes(const __le32 *rbuf) +{ + return ((__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_LENGTH_SHIFT) & + QLOGIC_IB_RHF_LENGTH_MASK) << 2; +} + +static inline __u32 qib_hdrget_index(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_EGRINDEX_SHIFT) & + QLOGIC_IB_RHF_EGRINDEX_MASK; +} + +static inline __u32 qib_hdrget_seq(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[1]) >> QLOGIC_IB_RHF_SEQ_SHIFT) & + QLOGIC_IB_RHF_SEQ_MASK; +} + +static inline __u32 qib_hdrget_offset(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[1]) >> QLOGIC_IB_RHF_HDRQ_OFFSET_SHIFT) & + QLOGIC_IB_RHF_HDRQ_OFFSET_MASK; +} + +static inline __u32 qib_hdrget_use_egr_buf(const __le32 *rbuf) +{ + return __le32_to_cpu(rbuf[0]) & QLOGIC_IB_RHF_L_USE_EGR; +} + +static inline __u32 qib_hdrget_qib_ver(__le32 hdrword) +{ + return (__le32_to_cpu(hdrword) >> QLOGIC_IB_I_VERS_SHIFT) & + QLOGIC_IB_I_VERS_MASK; +} + +#endif /* _QIB_COMMON_H */ diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c new file mode 100644 index 00000000..5246aa48 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "qib_verbs.h" + +/** + * qib_cq_enter - add a new entry to the completion queue + * @cq: completion queue + * @entry: work completion entry to add + * @sig: true if @entry is a solicitated entry + * + * This may be called with qp->s_lock held. + */ +void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) +{ + struct qib_cq_wc *wc; + unsigned long flags; + u32 head; + u32 next; + + spin_lock_irqsave(&cq->lock, flags); + + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + wc = cq->queue; + head = wc->head; + if (head >= (unsigned) cq->ibcq.cqe) { + head = cq->ibcq.cqe; + next = 0; + } else + next = head + 1; + if (unlikely(next == wc->tail)) { + spin_unlock_irqrestore(&cq->lock, flags); + if (cq->ibcq.event_handler) { + struct ib_event ev; + + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + return; + } + if (cq->ip) { + wc->uqueue[head].wr_id = entry->wr_id; + wc->uqueue[head].status = entry->status; + wc->uqueue[head].opcode = entry->opcode; + wc->uqueue[head].vendor_err = entry->vendor_err; + wc->uqueue[head].byte_len = entry->byte_len; + wc->uqueue[head].ex.imm_data = + (__u32 __force)entry->ex.imm_data; + wc->uqueue[head].qp_num = entry->qp->qp_num; + wc->uqueue[head].src_qp = entry->src_qp; + wc->uqueue[head].wc_flags = entry->wc_flags; + wc->uqueue[head].pkey_index = entry->pkey_index; + wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].sl = entry->sl; + wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; + wc->uqueue[head].port_num = entry->port_num; + /* Make sure entry is written before the head index. */ + smp_wmb(); + } else + wc->kqueue[head] = *entry; + wc->head = next; + + if (cq->notify == IB_CQ_NEXT_COMP || + (cq->notify == IB_CQ_SOLICITED && + (solicited || entry->status != IB_WC_SUCCESS))) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + /* + * This will cause send_complete() to be called in + * another thread. + */ + queue_work(qib_cq_wq, &cq->comptask); + } + + spin_unlock_irqrestore(&cq->lock, flags); +} + +/** + * qib_poll_cq - poll for work completion entries + * @ibcq: the completion queue to poll + * @num_entries: the maximum number of entries to return + * @entry: pointer to array where work completions are placed + * + * Returns the number of completion entries polled. + * + * This may be called from interrupt context. Also called by ib_poll_cq() + * in the generic verbs code. + */ +int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + struct qib_cq *cq = to_icq(ibcq); + struct qib_cq_wc *wc; + unsigned long flags; + int npolled; + u32 tail; + + /* The kernel can only poll a kernel completion queue */ + if (cq->ip) { + npolled = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&cq->lock, flags); + + wc = cq->queue; + tail = wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { + if (tail == wc->head) + break; + /* The kernel doesn't need a RMB since it has the lock. */ + *entry = wc->kqueue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; + else + tail++; + } + wc->tail = tail; + + spin_unlock_irqrestore(&cq->lock, flags); + +bail: + return npolled; +} + +static void send_complete(struct work_struct *work) +{ + struct qib_cq *cq = container_of(work, struct qib_cq, comptask); + + /* + * The completion handler will most likely rearm the notification + * and poll for all pending entries. If a new completion entry + * is added while we are in this routine, queue_work() + * won't call us again until we return so we check triggered to + * see if we need to call the handler again. + */ + for (;;) { + u8 triggered = cq->triggered; + + /* + * IPoIB connected mode assumes the callback is from a + * soft IRQ. We simulate this by blocking "bottom halves". + * See the implementation for ipoib_cm_handle_tx_wc(), + * netif_tx_lock_bh() and netif_tx_lock(). + */ + local_bh_disable(); + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + local_bh_enable(); + + if (cq->triggered == triggered) + return; + } +} + +/** + * qib_create_cq - create a completion queue + * @ibdev: the device this completion queue is attached to + * @entries: the minimum size of the completion queue + * @context: unused by the QLogic_IB driver + * @udata: user data for libibverbs.so + * + * Returns a pointer to the completion queue or negative errno values + * for failure. + * + * Called by ib_create_cq() in the generic verbs code. + */ +struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_cq *cq; + struct qib_cq_wc *wc; + struct ib_cq *ret; + u32 sz; + + if (entries < 1 || entries > ib_qib_max_cqes) { + ret = ERR_PTR(-EINVAL); + goto done; + } + + /* Allocate the completion queue structure. */ + cq = kmalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + ret = ERR_PTR(-ENOMEM); + goto done; + } + + /* + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (entries + 1); + else + sz += sizeof(struct ib_wc) * (entries + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = ERR_PTR(-ENOMEM); + goto bail_cq; + } + + /* + * Return the address of the WC as the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + + cq->ip = qib_create_mmap_info(dev, sz, context, wc); + if (!cq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + + err = ib_copy_to_udata(udata, &cq->ip->offset, + sizeof(cq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else + cq->ip = NULL; + + spin_lock(&dev->n_cqs_lock); + if (dev->n_cqs_allocated == ib_qib_max_cqs) { + spin_unlock(&dev->n_cqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_cqs_allocated++; + spin_unlock(&dev->n_cqs_lock); + + if (cq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + /* + * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. + * The number of entries should be >= the number requested or return + * an error. + */ + cq->ibcq.cqe = entries; + cq->notify = IB_CQ_NONE; + cq->triggered = 0; + spin_lock_init(&cq->lock); + INIT_WORK(&cq->comptask, send_complete); + wc->head = 0; + wc->tail = 0; + cq->queue = wc; + + ret = &cq->ibcq; + + goto done; + +bail_ip: + kfree(cq->ip); +bail_wc: + vfree(wc); +bail_cq: + kfree(cq); +done: + return ret; +} + +/** + * qib_destroy_cq - destroy a completion queue + * @ibcq: the completion queue to destroy. + * + * Returns 0 for success. + * + * Called by ib_destroy_cq() in the generic verbs code. + */ +int qib_destroy_cq(struct ib_cq *ibcq) +{ + struct qib_ibdev *dev = to_idev(ibcq->device); + struct qib_cq *cq = to_icq(ibcq); + + flush_work(&cq->comptask); + spin_lock(&dev->n_cqs_lock); + dev->n_cqs_allocated--; + spin_unlock(&dev->n_cqs_lock); + if (cq->ip) + kref_put(&cq->ip->ref, qib_release_mmap_info); + else + vfree(cq->queue); + kfree(cq); + + return 0; +} + +/** + * qib_req_notify_cq - change the notification type for a completion queue + * @ibcq: the completion queue + * @notify_flags: the type of notification to request + * + * Returns 0 for success. + * + * This may be called from interrupt context. Also called by + * ib_req_notify_cq() in the generic verbs code. + */ +int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) +{ + struct qib_cq *cq = to_icq(ibcq); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, flags); + /* + * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). + */ + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->queue->head != cq->queue->tail) + ret = 1; + + spin_unlock_irqrestore(&cq->lock, flags); + + return ret; +} + +/** + * qib_resize_cq - change the size of the CQ + * @ibcq: the completion queue + * + * Returns 0 for success. + */ +int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) +{ + struct qib_cq *cq = to_icq(ibcq); + struct qib_cq_wc *old_wc; + struct qib_cq_wc *wc; + u32 head, tail, n; + int ret; + u32 sz; + + if (cqe < 1 || cqe > ib_qib_max_cqes) { + ret = -EINVAL; + goto bail; + } + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); + else + sz += sizeof(struct ib_wc) * (cqe + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = -ENOMEM; + goto bail; + } + + /* Check that we can write the offset to mmap. */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = 0; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&cq->lock); + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + old_wc = cq->queue; + head = old_wc->head; + if (head > (u32) cq->ibcq.cqe) + head = (u32) cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; + else + n = head - tail; + if (unlikely((u32)cqe < n)) { + ret = -EINVAL; + goto bail_unlock; + } + for (n = 0; tail != head; n++) { + if (cq->ip) + wc->uqueue[n] = old_wc->uqueue[tail]; + else + wc->kqueue[n] = old_wc->kqueue[tail]; + if (tail == (u32) cq->ibcq.cqe) + tail = 0; + else + tail++; + } + cq->ibcq.cqe = cqe; + wc->head = n; + wc->tail = 0; + cq->queue = wc; + spin_unlock_irq(&cq->lock); + + vfree(old_wc); + + if (cq->ip) { + struct qib_ibdev *dev = to_idev(ibcq->device); + struct qib_mmap_info *ip = cq->ip; + + qib_update_mmap_info(dev, ip, sz, wc); + + /* + * Return the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = 0; + goto bail; + +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c new file mode 100644 index 00000000..9892456a --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -0,0 +1,908 @@ +/* + * Copyright (c) 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file contains support for diagnostic functions. It is accessed by + * opening the qib_diag device, normally minor number 129. Diagnostic use + * of the QLogic_IB chip may render the chip or board unusable until the + * driver is unloaded, or in some cases, until the system is rebooted. + * + * Accesses to the chip through this interface are not similar to going + * through the /sys/bus/pci resource mmap interface. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "qib.h" +#include "qib_common.h" + +/* + * Each client that opens the diag device must read then write + * offset 0, to prevent lossage from random cat or od. diag_state + * sequences this "handshake". + */ +enum diag_state { UNUSED = 0, OPENED, INIT, READY }; + +/* State for an individual client. PID so children cannot abuse handshake */ +static struct qib_diag_client { + struct qib_diag_client *next; + struct qib_devdata *dd; + pid_t pid; + enum diag_state state; +} *client_pool; + +/* + * Get a client struct. Recycled if possible, else kmalloc. + * Must be called with qib_mutex held + */ +static struct qib_diag_client *get_client(struct qib_devdata *dd) +{ + struct qib_diag_client *dc; + + dc = client_pool; + if (dc) + /* got from pool remove it and use */ + client_pool = dc->next; + else + /* None in pool, alloc and init */ + dc = kmalloc(sizeof *dc, GFP_KERNEL); + + if (dc) { + dc->next = NULL; + dc->dd = dd; + dc->pid = current->pid; + dc->state = OPENED; + } + return dc; +} + +/* + * Return to pool. Must be called with qib_mutex held + */ +static void return_client(struct qib_diag_client *dc) +{ + struct qib_devdata *dd = dc->dd; + struct qib_diag_client *tdc, *rdc; + + rdc = NULL; + if (dc == dd->diag_client) { + dd->diag_client = dc->next; + rdc = dc; + } else { + tdc = dc->dd->diag_client; + while (tdc) { + if (dc == tdc->next) { + tdc->next = dc->next; + rdc = dc; + break; + } + tdc = tdc->next; + } + } + if (rdc) { + rdc->state = UNUSED; + rdc->dd = NULL; + rdc->pid = 0; + rdc->next = client_pool; + client_pool = rdc; + } +} + +static int qib_diag_open(struct inode *in, struct file *fp); +static int qib_diag_release(struct inode *in, struct file *fp); +static ssize_t qib_diag_read(struct file *fp, char __user *data, + size_t count, loff_t *off); +static ssize_t qib_diag_write(struct file *fp, const char __user *data, + size_t count, loff_t *off); + +static const struct file_operations diag_file_ops = { + .owner = THIS_MODULE, + .write = qib_diag_write, + .read = qib_diag_read, + .open = qib_diag_open, + .release = qib_diag_release, + .llseek = default_llseek, +}; + +static atomic_t diagpkt_count = ATOMIC_INIT(0); +static struct cdev *diagpkt_cdev; +static struct device *diagpkt_device; + +static ssize_t qib_diagpkt_write(struct file *fp, const char __user *data, + size_t count, loff_t *off); + +static const struct file_operations diagpkt_file_ops = { + .owner = THIS_MODULE, + .write = qib_diagpkt_write, + .llseek = noop_llseek, +}; + +int qib_diag_add(struct qib_devdata *dd) +{ + char name[16]; + int ret = 0; + + if (atomic_inc_return(&diagpkt_count) == 1) { + ret = qib_cdev_init(QIB_DIAGPKT_MINOR, "ipath_diagpkt", + &diagpkt_file_ops, &diagpkt_cdev, + &diagpkt_device); + if (ret) + goto done; + } + + snprintf(name, sizeof(name), "ipath_diag%d", dd->unit); + ret = qib_cdev_init(QIB_DIAG_MINOR_BASE + dd->unit, name, + &diag_file_ops, &dd->diag_cdev, + &dd->diag_device); +done: + return ret; +} + +static void qib_unregister_observers(struct qib_devdata *dd); + +void qib_diag_remove(struct qib_devdata *dd) +{ + struct qib_diag_client *dc; + + if (atomic_dec_and_test(&diagpkt_count)) + qib_cdev_cleanup(&diagpkt_cdev, &diagpkt_device); + + qib_cdev_cleanup(&dd->diag_cdev, &dd->diag_device); + + /* + * Return all diag_clients of this device. There should be none, + * as we are "guaranteed" that no clients are still open + */ + while (dd->diag_client) + return_client(dd->diag_client); + + /* Now clean up all unused client structs */ + while (client_pool) { + dc = client_pool; + client_pool = dc->next; + kfree(dc); + } + /* Clean up observer list */ + qib_unregister_observers(dd); +} + +/* qib_remap_ioaddr32 - remap an offset into chip address space to __iomem * + * + * @dd: the qlogic_ib device + * @offs: the offset in chip-space + * @cntp: Pointer to max (byte) count for transfer starting at offset + * This returns a u32 __iomem * so it can be used for both 64 and 32-bit + * mapping. It is needed because with the use of PAT for control of + * write-combining, the logically contiguous address-space of the chip + * may be split into virtually non-contiguous spaces, with different + * attributes, which are them mapped to contiguous physical space + * based from the first BAR. + * + * The code below makes the same assumptions as were made in + * init_chip_wc_pat() (qib_init.c), copied here: + * Assumes chip address space looks like: + * - kregs + sregs + cregs + uregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * or: + * - kregs + sregs + cregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * - uregs + * + * If cntp is non-NULL, returns how many bytes from offset can be accessed + * Returns 0 if the offset is not mapped. + */ +static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset, + u32 *cntp) +{ + u32 kreglen; + u32 snd_bottom, snd_lim = 0; + u32 __iomem *krb32 = (u32 __iomem *)dd->kregbase; + u32 __iomem *map = NULL; + u32 cnt = 0; + u32 tot4k, offs4k; + + /* First, simplest case, offset is within the first map. */ + kreglen = (dd->kregend - dd->kregbase) * sizeof(u64); + if (offset < kreglen) { + map = krb32 + (offset / sizeof(u32)); + cnt = kreglen - offset; + goto mapped; + } + + /* + * Next check for user regs, the next most common case, + * and a cheap check because if they are not in the first map + * they are last in chip. + */ + if (dd->userbase) { + /* If user regs mapped, they are after send, so set limit. */ + u32 ulim = (dd->cfgctxts * dd->ureg_align) + dd->uregbase; + if (!dd->piovl15base) + snd_lim = dd->uregbase; + krb32 = (u32 __iomem *)dd->userbase; + if (offset >= dd->uregbase && offset < ulim) { + map = krb32 + (offset - dd->uregbase) / sizeof(u32); + cnt = ulim - offset; + goto mapped; + } + } + + /* + * Lastly, check for offset within Send Buffers. + * This is gnarly because struct devdata is deliberately vague + * about things like 7322 VL15 buffers, and we are not in + * chip-specific code here, so should not make many assumptions. + * The one we _do_ make is that the only chip that has more sndbufs + * than we admit is the 7322, and it has userregs above that, so + * we know the snd_lim. + */ + /* Assume 2K buffers are first. */ + snd_bottom = dd->pio2k_bufbase; + if (snd_lim == 0) { + u32 tot2k = dd->piobcnt2k * ALIGN(dd->piosize2k, dd->palign); + snd_lim = snd_bottom + tot2k; + } + /* If 4k buffers exist, account for them by bumping + * appropriate limit. + */ + tot4k = dd->piobcnt4k * dd->align4k; + offs4k = dd->piobufbase >> 32; + if (dd->piobcnt4k) { + if (snd_bottom > offs4k) + snd_bottom = offs4k; + else { + /* 4k above 2k. Bump snd_lim, if needed*/ + if (!dd->userbase || dd->piovl15base) + snd_lim = offs4k + tot4k; + } + } + /* + * Judgement call: can we ignore the space between SendBuffs and + * UserRegs, where we would like to see vl15 buffs, but not more? + */ + if (offset >= snd_bottom && offset < snd_lim) { + offset -= snd_bottom; + map = (u32 __iomem *)dd->piobase + (offset / sizeof(u32)); + cnt = snd_lim - offset; + } + + if (!map && offs4k && dd->piovl15base) { + snd_lim = offs4k + tot4k + 2 * dd->align4k; + if (offset >= (offs4k + tot4k) && offset < snd_lim) { + map = (u32 __iomem *)dd->piovl15base + + ((offset - (offs4k + tot4k)) / sizeof(u32)); + cnt = snd_lim - offset; + } + } + +mapped: + if (cntp) + *cntp = cnt; + return map; +} + +/* + * qib_read_umem64 - read a 64-bit quantity from the chip into user space + * @dd: the qlogic_ib device + * @uaddr: the location to store the data in user memory + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @count: number of bytes to copy (multiple of 32 bits) + * + * This function also localizes all chip memory accesses. + * The copy should be written such that we read full cacheline packets + * from the chip. This is usually used for a single qword + * + * NOTE: This assumes the chip address is 64-bit aligned. + */ +static int qib_read_umem64(struct qib_devdata *dd, void __user *uaddr, + u32 regoffs, size_t count) +{ + const u64 __iomem *reg_addr; + const u64 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = (const u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u64)); + + /* not very efficient, but it works for now */ + while (reg_addr < reg_end) { + u64 data = readq(reg_addr); + + if (copy_to_user(uaddr, &data, sizeof(u64))) { + ret = -EFAULT; + goto bail; + } + reg_addr++; + uaddr += sizeof(u64); + } + ret = 0; +bail: + return ret; +} + +/* + * qib_write_umem64 - write a 64-bit quantity to the chip from user space + * @dd: the qlogic_ib device + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @uaddr: the source of the data in user memory + * @count: the number of bytes to copy (multiple of 32 bits) + * + * This is usually used for a single qword + * NOTE: This assumes the chip address is 64-bit aligned. + */ + +static int qib_write_umem64(struct qib_devdata *dd, u32 regoffs, + const void __user *uaddr, size_t count) +{ + u64 __iomem *reg_addr; + const u64 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = (u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u64)); + + /* not very efficient, but it works for now */ + while (reg_addr < reg_end) { + u64 data; + if (copy_from_user(&data, uaddr, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + writeq(data, reg_addr); + + reg_addr++; + uaddr += sizeof(u64); + } + ret = 0; +bail: + return ret; +} + +/* + * qib_read_umem32 - read a 32-bit quantity from the chip into user space + * @dd: the qlogic_ib device + * @uaddr: the location to store the data in user memory + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @count: number of bytes to copy + * + * read 32 bit values, not 64 bit; for memories that only + * support 32 bit reads; usually a single dword. + */ +static int qib_read_umem32(struct qib_devdata *dd, void __user *uaddr, + u32 regoffs, size_t count) +{ + const u32 __iomem *reg_addr; + const u32 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u32)); + + /* not very efficient, but it works for now */ + while (reg_addr < reg_end) { + u32 data = readl(reg_addr); + + if (copy_to_user(uaddr, &data, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + + reg_addr++; + uaddr += sizeof(u32); + + } + ret = 0; +bail: + return ret; +} + +/* + * qib_write_umem32 - write a 32-bit quantity to the chip from user space + * @dd: the qlogic_ib device + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @uaddr: the source of the data in user memory + * @count: number of bytes to copy + * + * write 32 bit values, not 64 bit; for memories that only + * support 32 bit write; usually a single dword. + */ + +static int qib_write_umem32(struct qib_devdata *dd, u32 regoffs, + const void __user *uaddr, size_t count) +{ + u32 __iomem *reg_addr; + const u32 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u32)); + + while (reg_addr < reg_end) { + u32 data; + + if (copy_from_user(&data, uaddr, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + writel(data, reg_addr); + + reg_addr++; + uaddr += sizeof(u32); + } + ret = 0; +bail: + return ret; +} + +static int qib_diag_open(struct inode *in, struct file *fp) +{ + int unit = iminor(in) - QIB_DIAG_MINOR_BASE; + struct qib_devdata *dd; + struct qib_diag_client *dc; + int ret; + + mutex_lock(&qib_mutex); + + dd = qib_lookup(unit); + + if (dd == NULL || !(dd->flags & QIB_PRESENT) || + !dd->kregbase) { + ret = -ENODEV; + goto bail; + } + + dc = get_client(dd); + if (!dc) { + ret = -ENOMEM; + goto bail; + } + dc->next = dd->diag_client; + dd->diag_client = dc; + fp->private_data = dc; + ret = 0; +bail: + mutex_unlock(&qib_mutex); + + return ret; +} + +/** + * qib_diagpkt_write - write an IB packet + * @fp: the diag data device file pointer + * @data: qib_diag_pkt structure saying where to get the packet + * @count: size of data to write + * @off: unused by this code + */ +static ssize_t qib_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off) +{ + u32 __iomem *piobuf; + u32 plen, clen, pbufn; + struct qib_diag_xpkt dp; + u32 *tmpbuf = NULL; + struct qib_devdata *dd; + struct qib_pportdata *ppd; + ssize_t ret = 0; + + if (count != sizeof(dp)) { + ret = -EINVAL; + goto bail; + } + if (copy_from_user(&dp, data, sizeof(dp))) { + ret = -EFAULT; + goto bail; + } + + dd = qib_lookup(dp.unit); + if (!dd || !(dd->flags & QIB_PRESENT) || !dd->kregbase) { + ret = -ENODEV; + goto bail; + } + if (!(dd->flags & QIB_INITTED)) { + /* no hardware, freeze, etc. */ + ret = -ENODEV; + goto bail; + } + + if (dp.version != _DIAG_XPKT_VERS) { + qib_dev_err(dd, "Invalid version %u for diagpkt_write\n", + dp.version); + ret = -EINVAL; + goto bail; + } + /* send count must be an exact number of dwords */ + if (dp.len & 3) { + ret = -EINVAL; + goto bail; + } + if (!dp.port || dp.port > dd->num_pports) { + ret = -EINVAL; + goto bail; + } + ppd = &dd->pport[dp.port - 1]; + + /* need total length before first word written */ + /* +1 word is for the qword padding */ + plen = sizeof(u32) + dp.len; + clen = dp.len >> 2; + + if ((plen + 4) > ppd->ibmaxlen) { + ret = -EINVAL; + goto bail; /* before writing pbc */ + } + tmpbuf = vmalloc(plen); + if (!tmpbuf) { + qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, " + "failing\n"); + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmpbuf, + (const void __user *) (unsigned long) dp.data, + dp.len)) { + ret = -EFAULT; + goto bail; + } + + plen >>= 2; /* in dwords */ + + if (dp.pbc_wd == 0) + dp.pbc_wd = plen; + + piobuf = dd->f_getsendbuf(ppd, dp.pbc_wd, &pbufn); + if (!piobuf) { + ret = -EBUSY; + goto bail; + } + /* disarm it just to be extra sure */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbufn)); + + /* disable header check on pbufn for this packet */ + dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_DIS1, NULL); + + writeq(dp.pbc_wd, piobuf); + /* + * Copy all but the trigger word, then flush, so it's written + * to chip before trigger word, then write trigger word, then + * flush again, so packet is sent. + */ + if (dd->flags & QIB_PIO_FLUSH_WC) { + qib_flush_wc(); + qib_pio_copy(piobuf + 2, tmpbuf, clen - 1); + qib_flush_wc(); + __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + } else + qib_pio_copy(piobuf + 2, tmpbuf, clen); + + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + + /* + * Ensure buffer is written to the chip, then re-enable + * header checks (if supported by chip). The txchk + * code will ensure seen by chip before returning. + */ + qib_flush_wc(); + qib_sendbuf_done(dd, pbufn); + dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_ENAB1, NULL); + + ret = sizeof(dp); + +bail: + vfree(tmpbuf); + return ret; +} + +static int qib_diag_release(struct inode *in, struct file *fp) +{ + mutex_lock(&qib_mutex); + return_client(fp->private_data); + fp->private_data = NULL; + mutex_unlock(&qib_mutex); + return 0; +} + +/* + * Chip-specific code calls to register its interest in + * a specific range. + */ +struct diag_observer_list_elt { + struct diag_observer_list_elt *next; + const struct diag_observer *op; +}; + +int qib_register_observer(struct qib_devdata *dd, + const struct diag_observer *op) +{ + struct diag_observer_list_elt *olp; + int ret = -EINVAL; + + if (!dd || !op) + goto bail; + ret = -ENOMEM; + olp = vmalloc(sizeof *olp); + if (!olp) { + printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n"); + goto bail; + } + if (olp) { + unsigned long flags; + + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + ret = 0; + } +bail: + return ret; +} + +/* Remove all registered observers when device is closed */ +static void qib_unregister_observers(struct qib_devdata *dd) +{ + struct diag_observer_list_elt *olp; + unsigned long flags; + + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp = dd->diag_observer_list; + while (olp) { + /* Pop one observer, let go of lock */ + dd->diag_observer_list = olp->next; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + vfree(olp); + /* try again. */ + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp = dd->diag_observer_list; + } + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); +} + +/* + * Find the observer, if any, for the specified address. Initial implementation + * is simple stack of observers. This must be called with diag transaction + * lock held. + */ +static const struct diag_observer *diag_get_observer(struct qib_devdata *dd, + u32 addr) +{ + struct diag_observer_list_elt *olp; + const struct diag_observer *op = NULL; + + olp = dd->diag_observer_list; + while (olp) { + op = olp->op; + if (addr >= op->bottom && addr <= op->top) + break; + olp = olp->next; + } + if (!olp) + op = NULL; + + return op; +} + +static ssize_t qib_diag_read(struct file *fp, char __user *data, + size_t count, loff_t *off) +{ + struct qib_diag_client *dc = fp->private_data; + struct qib_devdata *dd = dc->dd; + void __iomem *kreg_base; + ssize_t ret; + + if (dc->pid != current->pid) { + ret = -EPERM; + goto bail; + } + + kreg_base = dd->kregbase; + + if (count == 0) + ret = 0; + else if ((count % 4) || (*off % 4)) + /* address or length is not 32-bit aligned, hence invalid */ + ret = -EINVAL; + else if (dc->state < READY && (*off || count != 8)) + ret = -EINVAL; /* prevent cat /dev/qib_diag* */ + else { + unsigned long flags; + u64 data64 = 0; + int use_32; + const struct diag_observer *op; + + use_32 = (count % 8) || (*off % 8); + ret = -1; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + /* + * Check for observer on this address range. + * we only support a single 32 or 64-bit read + * via observer, currently. + */ + op = diag_get_observer(dd, *off); + if (op) { + u32 offset = *off; + ret = op->hook(dd, op, offset, &data64, 0, use_32); + } + /* + * We need to release lock before any copy_to_user(), + * whether implicit in qib_read_umem* or explicit below. + */ + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + if (!op) { + if (use_32) + /* + * Address or length is not 64-bit aligned; + * do 32-bit rd + */ + ret = qib_read_umem32(dd, data, (u32) *off, + count); + else + ret = qib_read_umem64(dd, data, (u32) *off, + count); + } else if (ret == count) { + /* Below finishes case where observer existed */ + ret = copy_to_user(data, &data64, use_32 ? + sizeof(u32) : sizeof(u64)); + if (ret) + ret = -EFAULT; + } + } + + if (ret >= 0) { + *off += count; + ret = count; + if (dc->state == OPENED) + dc->state = INIT; + } +bail: + return ret; +} + +static ssize_t qib_diag_write(struct file *fp, const char __user *data, + size_t count, loff_t *off) +{ + struct qib_diag_client *dc = fp->private_data; + struct qib_devdata *dd = dc->dd; + void __iomem *kreg_base; + ssize_t ret; + + if (dc->pid != current->pid) { + ret = -EPERM; + goto bail; + } + + kreg_base = dd->kregbase; + + if (count == 0) + ret = 0; + else if ((count % 4) || (*off % 4)) + /* address or length is not 32-bit aligned, hence invalid */ + ret = -EINVAL; + else if (dc->state < READY && + ((*off || count != 8) || dc->state != INIT)) + /* No writes except second-step of init seq */ + ret = -EINVAL; /* before any other write allowed */ + else { + unsigned long flags; + const struct diag_observer *op = NULL; + int use_32 = (count % 8) || (*off % 8); + + /* + * Check for observer on this address range. + * We only support a single 32 or 64-bit write + * via observer, currently. This helps, because + * we would otherwise have to jump through hoops + * to make "diag transaction" meaningful when we + * cannot do a copy_from_user while holding the lock. + */ + if (count == 4 || count == 8) { + u64 data64; + u32 offset = *off; + ret = copy_from_user(&data64, data, count); + if (ret) { + ret = -EFAULT; + goto bail; + } + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + op = diag_get_observer(dd, *off); + if (op) + ret = op->hook(dd, op, offset, &data64, ~0Ull, + use_32); + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + } + + if (!op) { + if (use_32) + /* + * Address or length is not 64-bit aligned; + * do 32-bit write + */ + ret = qib_write_umem32(dd, (u32) *off, data, + count); + else + ret = qib_write_umem64(dd, (u32) *off, data, + count); + } + } + + if (ret >= 0) { + *off += count; + ret = count; + if (dc->state == INIT) + dc->state = READY; /* all read/write OK now */ + } +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c new file mode 100644 index 00000000..2920bb39 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2006, 2009, 2010 QLogic, Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +#include "qib_verbs.h" + +#define BAD_DMA_ADDRESS ((u64) 0) + +/* + * The following functions implement driver specific replacements + * for the ib_dma_*() functions. + * + * These functions return kernel virtual addresses instead of + * device bus addresses since the driver uses the CPU to copy + * data instead of using hardware DMA. + */ + +static int qib_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == BAD_DMA_ADDRESS; +} + +static u64 qib_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + return (u64) cpu_addr; +} + +static void qib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static u64 qib_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + BUG_ON(!valid_dma_direction(direction)); + + if (offset + size > PAGE_SIZE) { + addr = BAD_DMA_ADDRESS; + goto done; + } + + addr = (u64) page_address(page); + if (addr) + addr += offset; + /* TODO: handle highmem pages */ + +done: + return addr; +} + +static void qib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + u64 addr; + int i; + int ret = nents; + + BUG_ON(!valid_dma_direction(direction)); + + for_each_sg(sgl, sg, nents, i) { + addr = (u64) page_address(sg_page(sg)); + /* TODO: handle highmem pages */ + if (!addr) { + ret = 0; + break; + } + } + return ret; +} + +static void qib_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) +{ + u64 addr = (u64) page_address(sg_page(sg)); + + if (addr) + addr += sg->offset; + return addr; +} + +static unsigned int qib_sg_dma_len(struct ib_device *dev, + struct scatterlist *sg) +{ + return sg->length; +} + +static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void qib_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ +} + +static void *qib_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + if (dma_handle) + *dma_handle = (u64) addr; + return addr; +} + +static void qib_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + free_pages((unsigned long) cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops qib_dma_mapping_ops = { + .mapping_error = qib_mapping_error, + .map_single = qib_dma_map_single, + .unmap_single = qib_dma_unmap_single, + .map_page = qib_dma_map_page, + .unmap_page = qib_dma_unmap_page, + .map_sg = qib_map_sg, + .unmap_sg = qib_unmap_sg, + .dma_address = qib_sg_dma_address, + .dma_len = qib_sg_dma_len, + .sync_single_for_cpu = qib_sync_single_for_cpu, + .sync_single_for_device = qib_sync_single_for_device, + .alloc_coherent = qib_dma_alloc_coherent, + .free_coherent = qib_dma_free_coherent +}; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c new file mode 100644 index 00000000..6fc9365b --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -0,0 +1,810 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "qib.h" + +/* + * The size has to be longer than this string, so we can append + * board/chip information to it in the init code. + */ +const char ib_qib_version[] = QIB_IDSTR "\n"; + +DEFINE_SPINLOCK(qib_devs_lock); +LIST_HEAD(qib_dev_list); +DEFINE_MUTEX(qib_mutex); /* general driver use */ + +unsigned qib_ibmtu; +module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO); +MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096"); + +unsigned qib_compat_ddr_negotiate = 1; +module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(compat_ddr_negotiate, + "Attempt pre-IBTA 1.2 DDR speed negotiation"); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("QLogic "); +MODULE_DESCRIPTION("QLogic IB driver"); + +/* + * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our + * PIO send buffers. This is well beyond anything currently + * defined in the InfiniBand spec. + */ +#define QIB_PIO_MAXIBHDR 128 + +/* + * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt. + */ +#define QIB_MAX_PKT_RECV 64 + +struct qlogic_ib_stats qib_stats; + +const char *qib_get_unit_name(int unit) +{ + static char iname[16]; + + snprintf(iname, sizeof iname, "infinipath%u", unit); + return iname; +} + +/* + * Return count of units with at least one port ACTIVE. + */ +int qib_count_active_units(void) +{ + struct qib_devdata *dd; + struct qib_pportdata *ppd; + unsigned long flags; + int pidx, nunits_active = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + if (!(dd->flags & QIB_PRESENT) || !dd->kregbase) + continue; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE))) { + nunits_active++; + break; + } + } + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return nunits_active; +} + +/* + * Return count of all units, optionally return in arguments + * the number of usable (present) units, and the number of + * ports that are up. + */ +int qib_count_units(int *npresentp, int *nupp) +{ + int nunits = 0, npresent = 0, nup = 0; + struct qib_devdata *dd; + unsigned long flags; + int pidx; + struct qib_pportdata *ppd; + + spin_lock_irqsave(&qib_devs_lock, flags); + + list_for_each_entry(dd, &qib_dev_list, list) { + nunits++; + if ((dd->flags & QIB_PRESENT) && dd->kregbase) + npresent++; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE))) + nup++; + } + } + + spin_unlock_irqrestore(&qib_devs_lock, flags); + + if (npresentp) + *npresentp = npresent; + if (nupp) + *nupp = nup; + + return nunits; +} + +/** + * qib_wait_linkstate - wait for an IB link state change to occur + * @dd: the qlogic_ib device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * wait up to msecs milliseconds for IB link state change to occur for + * now, take the easy polling route. Currently used only by + * qib_set_linkstate. Returns 0 if state reached, otherwise + * -ETIMEDOUT state can have multiple states set, for any of several + * transitions. + */ +int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + if (ppd->state_wanted) { + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + ret = -EBUSY; + goto bail; + } + ppd->state_wanted = state; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + wait_event_interruptible_timeout(ppd->state_wait, + (ppd->lflags & state), + msecs_to_jiffies(msecs)); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->state_wanted = 0; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + if (!(ppd->lflags & state)) + ret = -ETIMEDOUT; + else + ret = 0; +bail: + return ret; +} + +int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate) +{ + u32 lstate; + int ret; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + switch (newstate) { + case QIB_IB_LINKDOWN_ONLY: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN_SLEEP: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN_DISABLE: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKARM: + if (ppd->lflags & QIBL_LINKARMED) { + ret = 0; + goto bail; + } + if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) { + ret = -EINVAL; + goto bail; + } + /* + * Since the port can be ACTIVE when we ask for ARMED, + * clear QIBL_LINKV so we can wait for a transition. + * If the link isn't ARMED, then something else happened + * and there is no point waiting for ARMED. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP); + lstate = QIBL_LINKV; + break; + + case QIB_IB_LINKACTIVE: + if (ppd->lflags & QIBL_LINKACTIVE) { + ret = 0; + goto bail; + } + if (!(ppd->lflags & QIBL_LINKARMED)) { + ret = -EINVAL; + goto bail; + } + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP); + lstate = QIBL_LINKACTIVE; + break; + + default: + ret = -EINVAL; + goto bail; + } + ret = qib_wait_linkstate(ppd, lstate, 10); + +bail: + return ret; +} + +/* + * Get address of eager buffer from it's index (allocated in chunks, not + * contiguous). + */ +static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) +{ + const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift; + const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1); + + return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift); +} + +/* + * Returns 1 if error was a CRC, else 0. + * Needed for some chip's synthesized error counters. + */ +static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, + u32 ctxt, u32 eflags, u32 l, u32 etail, + __le32 *rhf_addr, struct qib_message_header *rhdr) +{ + u32 ret = 0; + + if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR)) + ret = 1; + else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { + /* For TIDERR and RC QPs premptively schedule a NAK */ + struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; + struct qib_other_headers *ohdr = NULL; + struct qib_ibport *ibp = &ppd->ibport_data; + struct qib_qp *qp = NULL; + u32 tlen = qib_hdrget_length_in_bytes(rhf_addr); + u16 lid = be16_to_cpu(hdr->lrh[1]); + int lnh = be16_to_cpu(hdr->lrh[0]) & 3; + u32 qp_num; + u32 opcode; + u32 psn; + int diff; + + /* Sanity check packet */ + if (tlen < 24) + goto drop; + + if (lid < QIB_MULTICAST_LID_BASE) { + lid &= ~((1 << ppd->lmc) - 1); + if (unlikely(lid != ppd->lid)) + goto drop; + } + + /* Check for GRH */ + if (lnh == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else if (lnh == QIB_LRH_GRH) { + u32 vtf; + + ohdr = &hdr->u.l.oth; + if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else + goto drop; + + /* Get opcode and PSN from packet */ + opcode = be32_to_cpu(ohdr->bth[0]); + opcode >>= 24; + psn = be32_to_cpu(ohdr->bth[2]); + + /* Get the destination QP number. */ + qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + if (qp_num != QIB_MULTICAST_QPN) { + int ruc_res; + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + + /* + * Handle only RC QPs - for other QP types drop error + * packet. + */ + spin_lock(&qp->r_lock); + + /* Check for valid receive state. */ + if (!(ib_qib_state_ops[qp->state] & + QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto unlock; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + ruc_res = + qib_ruc_check_hdr( + ibp, hdr, + lnh == QIB_LRH_GRH, + qp, + be32_to_cpu(ohdr->bth[0])); + if (ruc_res) + goto unlock; + + /* Only deal with RDMA Writes for now */ + if (opcode < + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { + diff = qib_cmp24(psn, qp->r_psn); + if (!qp->r_nak_state && diff >= 0) { + ibp->n_rc_seqnak++; + qp->r_nak_state = + IB_NAK_PSN_ERROR; + /* Use the expected PSN. */ + qp->r_ack_psn = qp->r_psn; + /* + * Wait to send the sequence + * NAK until all packets + * in the receive queue have + * been processed. + * Otherwise, we end up + * propagating congestion. + */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= + QIB_R_RSP_NAK; + atomic_inc( + &qp->refcount); + list_add_tail( + &qp->rspwait, + &rcd->qp_wait_list); + } + } /* Out of sequence NAK */ + } /* QP Request NAKs */ + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + case IB_QPT_UC: + default: + /* For now don't handle any other QP types */ + break; + } + +unlock: + spin_unlock(&qp->r_lock); + /* + * Notify qib_destroy_qp() if it is waiting + * for us to finish. + */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } /* Unicast QP */ + } /* Valid packet with TIDErr */ + +drop: + return ret; +} + +/* + * qib_kreceive - receive a packet + * @rcd: the qlogic_ib context + * @llic: gets count of good packets needed to clear lli, + * (used with chips that need need to track crcs for lli) + * + * called from interrupt handler for errors or receive interrupt + * Returns number of CRC error packets, needed by some chips for + * local link integrity tracking. crcs are adjusted down by following + * good packets, if any, and count of good packets is also tracked. + */ +u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_pportdata *ppd = rcd->ppd; + __le32 *rhf_addr; + void *ebuf; + const u32 rsize = dd->rcvhdrentsize; /* words */ + const u32 maxcnt = dd->rcvhdrcnt * rsize; /* words */ + u32 etail = -1, l, hdrqtail; + struct qib_message_header *hdr; + u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0; + int last; + u64 lval; + struct qib_qp *qp, *nqp; + + l = rcd->head; + rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; + if (dd->flags & QIB_NODMA_RTAIL) { + u32 seq = qib_hdrget_seq(rhf_addr); + if (seq != rcd->seq_cnt) + goto bail; + hdrqtail = 0; + } else { + hdrqtail = qib_get_rcvhdrtail(rcd); + if (l == hdrqtail) + goto bail; + smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ + } + + for (last = 0, i = 1; !last; i += !last) { + hdr = dd->f_get_msgheader(dd, rhf_addr); + eflags = qib_hdrget_err_flags(rhf_addr); + etype = qib_hdrget_rcv_type(rhf_addr); + /* total length */ + tlen = qib_hdrget_length_in_bytes(rhf_addr); + ebuf = NULL; + if ((dd->flags & QIB_NODMA_RTAIL) ? + qib_hdrget_use_egr_buf(rhf_addr) : + (etype != RCVHQ_RCV_TYPE_EXPECTED)) { + etail = qib_hdrget_index(rhf_addr); + updegr = 1; + if (tlen > sizeof(*hdr) || + etype >= RCVHQ_RCV_TYPE_NON_KD) + ebuf = qib_get_egrbuf(rcd, etail); + } + if (!eflags) { + u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2; + + if (lrh_len != tlen) { + qib_stats.sps_lenerrs++; + goto move_along; + } + } + if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags && + ebuf == NULL && + tlen > (dd->rcvhdrentsize - 2 + 1 - + qib_hdrget_offset(rhf_addr)) << 2) { + goto move_along; + } + + /* + * Both tiderr and qibhdrerr are set for all plain IB + * packets; only qibhdrerr should be set. + */ + if (unlikely(eflags)) + crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l, + etail, rhf_addr, hdr); + else if (etype == RCVHQ_RCV_TYPE_NON_KD) { + qib_ib_rcv(rcd, hdr, ebuf, tlen); + if (crcs) + crcs--; + else if (llic && *llic) + --*llic; + } +move_along: + l += rsize; + if (l >= maxcnt) + l = 0; + if (i == QIB_MAX_PKT_RECV) + last = 1; + + rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; + if (dd->flags & QIB_NODMA_RTAIL) { + u32 seq = qib_hdrget_seq(rhf_addr); + + if (++rcd->seq_cnt > 13) + rcd->seq_cnt = 1; + if (seq != rcd->seq_cnt) + last = 1; + } else if (l == hdrqtail) + last = 1; + /* + * Update head regs etc., every 16 packets, if not last pkt, + * to help prevent rcvhdrq overflows, when many packets + * are processed and queue is nearly full. + * Don't request an interrupt for intermediate updates. + */ + lval = l; + if (!last && !(i & 0xf)) { + dd->f_update_usrhead(rcd, lval, updegr, etail, i); + updegr = 0; + } + } + /* + * Notify qib_destroy_qp() if it is waiting + * for lookaside_qp to finish. + */ + if (rcd->lookaside_qp) { + if (atomic_dec_and_test(&rcd->lookaside_qp->refcount)) + wake_up(&rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } + + rcd->head = l; + rcd->pkt_count += i; + + /* + * Iterate over all QPs waiting to respond. + * The list won't change since the IRQ is only run on one CPU. + */ + list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { + list_del_init(&qp->rspwait); + if (qp->r_flags & QIB_R_RSP_NAK) { + qp->r_flags &= ~QIB_R_RSP_NAK; + qib_send_rc_ack(qp); + } + if (qp->r_flags & QIB_R_RSP_SEND) { + unsigned long flags; + + qp->r_flags &= ~QIB_R_RSP_SEND; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_qib_state_ops[qp->state] & + QIB_PROCESS_OR_FLUSH_SEND) + qib_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + } + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + +bail: + /* Report number of packets consumed */ + if (npkts) + *npkts = i; + + /* + * Always write head at end, and setup rcv interrupt, even + * if no packets were processed. + */ + lval = (u64)rcd->head | dd->rhdrhead_intr_off; + dd->f_update_usrhead(rcd, lval, updegr, etail, i); + return crcs; +} + +/** + * qib_set_mtu - set the MTU + * @ppd: the perport data + * @arg: the new MTU + * + * We can handle "any" incoming size, the issue here is whether we + * need to restrict our outgoing size. For now, we don't do any + * sanity checking on this, and we don't deal with what happens to + * programs that are already running when the size changes. + * NOTE: changing the MTU will usually cause the IBC to go back to + * link INIT state... + */ +int qib_set_mtu(struct qib_pportdata *ppd, u16 arg) +{ + u32 piosize; + int ret, chk; + + if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && + arg != 4096) { + ret = -EINVAL; + goto bail; + } + chk = ib_mtu_enum_to_int(qib_ibmtu); + if (chk > 0 && arg > chk) { + ret = -EINVAL; + goto bail; + } + + piosize = ppd->ibmaxlen; + ppd->ibmtu = arg; + + if (arg >= (piosize - QIB_PIO_MAXIBHDR)) { + /* Only if it's not the initial value (or reset to it) */ + if (piosize != ppd->init_ibmaxlen) { + if (arg > piosize && arg <= ppd->init_ibmaxlen) + piosize = ppd->init_ibmaxlen - 2 * sizeof(u32); + ppd->ibmaxlen = piosize; + } + } else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) { + piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32); + ppd->ibmaxlen = piosize; + } + + ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0); + + ret = 0; + +bail: + return ret; +} + +int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc) +{ + struct qib_devdata *dd = ppd->dd; + ppd->lid = lid; + ppd->lmc = lmc; + + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC, + lid | (~((1U << lmc) - 1)) << 16); + + qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n", + dd->unit, ppd->port, lid); + + return 0; +} + +/* + * Following deal with the "obviously simple" task of overriding the state + * of the LEDS, which normally indicate link physical and logical status. + * The complications arise in dealing with different hardware mappings + * and the board-dependent routine being called from interrupts. + * and then there's the requirement to _flash_ them. + */ +#define LED_OVER_FREQ_SHIFT 8 +#define LED_OVER_FREQ_MASK (0xFF<dd; + int timeoff; + int ph_idx; + + if (!(dd->flags & QIB_INITTED)) + return; + + ph_idx = ppd->led_override_phase++ & 1; + ppd->led_override = ppd->led_override_vals[ph_idx]; + timeoff = ppd->led_override_timeoff; + + dd->f_setextled(ppd, 1); + /* + * don't re-fire the timer if user asked for it to be off; we let + * it fire one more time after they turn it off to simplify + */ + if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) + mod_timer(&ppd->led_override_timer, jiffies + timeoff); +} + +void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val) +{ + struct qib_devdata *dd = ppd->dd; + int timeoff, freq; + + if (!(dd->flags & QIB_INITTED)) + return; + + /* First check if we are blinking. If not, use 1HZ polling */ + timeoff = HZ; + freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; + + if (freq) { + /* For blink, set each phase from one nybble of val */ + ppd->led_override_vals[0] = val & 0xF; + ppd->led_override_vals[1] = (val >> 4) & 0xF; + timeoff = (HZ << 4)/freq; + } else { + /* Non-blink set both phases the same. */ + ppd->led_override_vals[0] = val & 0xF; + ppd->led_override_vals[1] = val & 0xF; + } + ppd->led_override_timeoff = timeoff; + + /* + * If the timer has not already been started, do so. Use a "quick" + * timeout so the function will be called soon, to look at our request. + */ + if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { + /* Need to start timer */ + init_timer(&ppd->led_override_timer); + ppd->led_override_timer.function = qib_run_led_override; + ppd->led_override_timer.data = (unsigned long) ppd; + ppd->led_override_timer.expires = jiffies + 1; + add_timer(&ppd->led_override_timer); + } else { + if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) + mod_timer(&ppd->led_override_timer, jiffies + 1); + atomic_dec(&ppd->led_override_timer_active); + } +} + +/** + * qib_reset_device - reset the chip if possible + * @unit: the device to reset + * + * Whether or not reset is successful, we attempt to re-initialize the chip + * (that is, much like a driver unload/reload). We clear the INITTED flag + * so that the various entry points will fail until we reinitialize. For + * now, we only allow this if no user contexts are open that use chip resources + */ +int qib_reset_device(int unit) +{ + int ret, i; + struct qib_devdata *dd = qib_lookup(unit); + struct qib_pportdata *ppd; + unsigned long flags; + int pidx; + + if (!dd) { + ret = -ENODEV; + goto bail; + } + + qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); + + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { + qib_devinfo(dd->pcidev, "Invalid unit number %u or " + "not initialized or not present\n", unit); + ret = -ENXIO; + goto bail; + } + + spin_lock_irqsave(&dd->uctxt_lock, flags); + if (dd->rcd) + for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { + if (!dd->rcd[i] || !dd->rcd[i]->cnt) + continue; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + ret = -EBUSY; + goto bail; + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (atomic_read(&ppd->led_override_timer_active)) { + /* Need to stop LED timer, _then_ shut off LEDs */ + del_timer_sync(&ppd->led_override_timer); + atomic_set(&ppd->led_override_timer_active, 0); + } + + /* Shut off LEDs after we are sure timer is not running */ + ppd->led_override = LED_OVER_BOTH_OFF; + dd->f_setextled(ppd, 0); + if (dd->flags & QIB_HAS_SEND_DMA) + qib_teardown_sdma(ppd); + } + + ret = dd->f_reset(dd); + if (ret == 1) + ret = qib_init(dd, 1); + else + ret = -EAGAIN; + if (ret) + qib_dev_err(dd, "Reinitialize unit %u after " + "reset failed with %d\n", unit, ret); + else + qib_devinfo(dd->pcidev, "Reinitialized unit %u after " + "resetting\n", unit); + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c new file mode 100644 index 00000000..92d9cfe9 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "qib.h" + +/* + * Functions specific to the serial EEPROM on cards handled by ib_qib. + * The actual serail interface code is in qib_twsi.c. This file is a client + */ + +/** + * qib_eeprom_read - receives bytes from the eeprom via I2C + * @dd: the qlogic_ib device + * @eeprom_offset: address to read from + * @buffer: where to store result + * @len: number of bytes to receive + */ +int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset, + void *buff, int len) +{ + int ret; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (!ret) { + ret = qib_twsi_reset(dd); + if (ret) + qib_dev_err(dd, "EEPROM Reset for read failed\n"); + else + ret = qib_twsi_blk_rd(dd, dd->twsi_eeprom_dev, + eeprom_offset, buff, len); + mutex_unlock(&dd->eep_lock); + } + + return ret; +} + +/* + * Actually update the eeprom, first doing write enable if + * needed, then restoring write enable state. + * Must be called with eep_lock held + */ +static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset, + const void *buf, int len) +{ + int ret, pwen; + + pwen = dd->f_eeprom_wen(dd, 1); + ret = qib_twsi_reset(dd); + if (ret) + qib_dev_err(dd, "EEPROM Reset for write failed\n"); + else + ret = qib_twsi_blk_wr(dd, dd->twsi_eeprom_dev, + offset, buf, len); + dd->f_eeprom_wen(dd, pwen); + return ret; +} + +/** + * qib_eeprom_write - writes data to the eeprom via I2C + * @dd: the qlogic_ib device + * @eeprom_offset: where to place data + * @buffer: data to write + * @len: number of bytes to write + */ +int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset, + const void *buff, int len) +{ + int ret; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (!ret) { + ret = eeprom_write_with_enable(dd, eeprom_offset, buff, len); + mutex_unlock(&dd->eep_lock); + } + + return ret; +} + +static u8 flash_csum(struct qib_flash *ifp, int adjust) +{ + u8 *ip = (u8 *) ifp; + u8 csum = 0, len; + + /* + * Limit length checksummed to max length of actual data. + * Checksum of erased eeprom will still be bad, but we avoid + * reading past the end of the buffer we were passed. + */ + len = ifp->if_length; + if (len > sizeof(struct qib_flash)) + len = sizeof(struct qib_flash); + while (len--) + csum += *ip++; + csum -= ifp->if_csum; + csum = ~csum; + if (adjust) + ifp->if_csum = csum; + + return csum; +} + +/** + * qib_get_eeprom_info- get the GUID et al. from the TSWI EEPROM device + * @dd: the qlogic_ib device + * + * We have the capability to use the nguid field, and get + * the guid from the first chip's flash, to use for all of them. + */ +void qib_get_eeprom_info(struct qib_devdata *dd) +{ + void *buf; + struct qib_flash *ifp; + __be64 guid; + int len, eep_stat; + u8 csum, *bguid; + int t = dd->unit; + struct qib_devdata *dd0 = qib_lookup(0); + + if (t && dd0->nguid > 1 && t <= dd0->nguid) { + u8 oguid; + dd->base_guid = dd0->base_guid; + bguid = (u8 *) &dd->base_guid; + + oguid = bguid[7]; + bguid[7] += t; + if (oguid > bguid[7]) { + if (bguid[6] == 0xff) { + if (bguid[5] == 0xff) { + qib_dev_err(dd, "Can't set %s GUID" + " from base, wraps to" + " OUI!\n", + qib_get_unit_name(t)); + dd->base_guid = 0; + goto bail; + } + bguid[5]++; + } + bguid[6]++; + } + dd->nguid = 1; + goto bail; + } + + /* + * Read full flash, not just currently used part, since it may have + * been written with a newer definition. + * */ + len = sizeof(struct qib_flash); + buf = vmalloc(len); + if (!buf) { + qib_dev_err(dd, "Couldn't allocate memory to read %u " + "bytes from eeprom for GUID\n", len); + goto bail; + } + + /* + * Use "public" eeprom read function, which does locking and + * figures out device. This will migrate to chip-specific. + */ + eep_stat = qib_eeprom_read(dd, 0, buf, len); + + if (eep_stat) { + qib_dev_err(dd, "Failed reading GUID from eeprom\n"); + goto done; + } + ifp = (struct qib_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + qib_devinfo(dd->pcidev, "Bad I2C flash checksum: " + "0x%x, not 0x%x\n", csum, ifp->if_csum); + goto done; + } + if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || + *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { + qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n", + *(unsigned long long *) ifp->if_guid); + /* don't allow GUID if all 0 or all 1's */ + goto done; + } + + /* complain, but allow it */ + if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) + qib_devinfo(dd->pcidev, "Warning, GUID %llx is " + "default, probably not correct!\n", + *(unsigned long long *) ifp->if_guid); + + bguid = ifp->if_guid; + if (!bguid[0] && !bguid[1] && !bguid[2]) { + /* + * Original incorrect GUID format in flash; fix in + * core copy, by shifting up 2 octets; don't need to + * change top octet, since both it and shifted are 0. + */ + bguid[1] = bguid[3]; + bguid[2] = bguid[4]; + bguid[3] = 0; + bguid[4] = 0; + guid = *(__be64 *) ifp->if_guid; + } else + guid = *(__be64 *) ifp->if_guid; + dd->base_guid = guid; + dd->nguid = ifp->if_numguid; + /* + * Things are slightly complicated by the desire to transparently + * support both the Pathscale 10-digit serial number and the QLogic + * 13-character version. + */ + if ((ifp->if_fversion > 1) && ifp->if_sprefix[0] && + ((u8 *) ifp->if_sprefix)[0] != 0xFF) { + char *snp = dd->serial; + + /* + * This board has a Serial-prefix, which is stored + * elsewhere for backward-compatibility. + */ + memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); + snp[sizeof ifp->if_sprefix] = '\0'; + len = strlen(snp); + snp += len; + len = (sizeof dd->serial) - len; + if (len > sizeof ifp->if_serial) + len = sizeof ifp->if_serial; + memcpy(snp, ifp->if_serial, len); + } else + memcpy(dd->serial, ifp->if_serial, + sizeof ifp->if_serial); + if (!strstr(ifp->if_comment, "Tested successfully")) + qib_dev_err(dd, "Board SN %s did not pass functional " + "test: %s\n", dd->serial, ifp->if_comment); + + memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); + /* + * Power-on (actually "active") hours are kept as little-endian value + * in EEPROM, but as seconds in a (possibly as small as 24-bit) + * atomic_t while running. + */ + atomic_set(&dd->active_time, 0); + dd->eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); + +done: + vfree(buf); + +bail:; +} + +/** + * qib_update_eeprom_log - copy active-time and error counters to eeprom + * @dd: the qlogic_ib device + * + * Although the time is kept as seconds in the qib_devdata struct, it is + * rounded to hours for re-write, as we have only 16 bits in EEPROM. + * First-cut code reads whole (expected) struct qib_flash, modifies, + * re-writes. Future direction: read/write only what we need, assuming + * that the EEPROM had to have been "good enough" for driver init, and + * if not, we aren't making it worse. + * + */ +int qib_update_eeprom_log(struct qib_devdata *dd) +{ + void *buf; + struct qib_flash *ifp; + int len, hi_water; + uint32_t new_time, new_hrs; + u8 csum; + int ret, idx; + unsigned long flags; + + /* first, check if we actually need to do anything. */ + ret = 0; + for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { + if (dd->eep_st_new_errs[idx]) { + ret = 1; + break; + } + } + new_time = atomic_read(&dd->active_time); + + if (ret == 0 && new_time < 3600) + goto bail; + + /* + * The quick-check above determined that there is something worthy + * of logging, so get current contents and do a more detailed idea. + * read full flash, not just currently used part, since it may have + * been written with a newer definition + */ + len = sizeof(struct qib_flash); + buf = vmalloc(len); + ret = 1; + if (!buf) { + qib_dev_err(dd, "Couldn't allocate memory to read %u " + "bytes from eeprom for logging\n", len); + goto bail; + } + + /* Grab semaphore and read current EEPROM. If we get an + * error, let go, but if not, keep it until we finish write. + */ + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) { + qib_dev_err(dd, "Unable to acquire EEPROM for logging\n"); + goto free_bail; + } + ret = qib_twsi_blk_rd(dd, dd->twsi_eeprom_dev, 0, buf, len); + if (ret) { + mutex_unlock(&dd->eep_lock); + qib_dev_err(dd, "Unable read EEPROM for logging\n"); + goto free_bail; + } + ifp = (struct qib_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + mutex_unlock(&dd->eep_lock); + qib_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", + csum, ifp->if_csum); + ret = 1; + goto free_bail; + } + hi_water = 0; + spin_lock_irqsave(&dd->eep_st_lock, flags); + for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { + int new_val = dd->eep_st_new_errs[idx]; + if (new_val) { + /* + * If we have seen any errors, add to EEPROM values + * We need to saturate at 0xFF (255) and we also + * would need to adjust the checksum if we were + * trying to minimize EEPROM traffic + * Note that we add to actual current count in EEPROM, + * in case it was altered while we were running. + */ + new_val += ifp->if_errcntp[idx]; + if (new_val > 0xFF) + new_val = 0xFF; + if (ifp->if_errcntp[idx] != new_val) { + ifp->if_errcntp[idx] = new_val; + hi_water = offsetof(struct qib_flash, + if_errcntp) + idx; + } + /* + * update our shadow (used to minimize EEPROM + * traffic), to match what we are about to write. + */ + dd->eep_st_errs[idx] = new_val; + dd->eep_st_new_errs[idx] = 0; + } + } + /* + * Now update active-time. We would like to round to the nearest hour + * but unless atomic_t are sure to be proper signed ints we cannot, + * because we need to account for what we "transfer" to EEPROM and + * if we log an hour at 31 minutes, then we would need to set + * active_time to -29 to accurately count the _next_ hour. + */ + if (new_time >= 3600) { + new_hrs = new_time / 3600; + atomic_sub((new_hrs * 3600), &dd->active_time); + new_hrs += dd->eep_hrs; + if (new_hrs > 0xFFFF) + new_hrs = 0xFFFF; + dd->eep_hrs = new_hrs; + if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { + ifp->if_powerhour[0] = new_hrs & 0xFF; + hi_water = offsetof(struct qib_flash, if_powerhour); + } + if ((new_hrs >> 8) != ifp->if_powerhour[1]) { + ifp->if_powerhour[1] = new_hrs >> 8; + hi_water = offsetof(struct qib_flash, if_powerhour) + 1; + } + } + /* + * There is a tiny possibility that we could somehow fail to write + * the EEPROM after updating our shadows, but problems from holding + * the spinlock too long are a much bigger issue. + */ + spin_unlock_irqrestore(&dd->eep_st_lock, flags); + if (hi_water) { + /* we made some change to the data, uopdate cksum and write */ + csum = flash_csum(ifp, 1); + ret = eeprom_write_with_enable(dd, 0, buf, hi_water + 1); + } + mutex_unlock(&dd->eep_lock); + if (ret) + qib_dev_err(dd, "Failed updating EEPROM\n"); + +free_bail: + vfree(buf); +bail: + return ret; +} + +/** + * qib_inc_eeprom_err - increment one of the four error counters + * that are logged to EEPROM. + * @dd: the qlogic_ib device + * @eidx: 0..3, the counter to increment + * @incr: how much to add + * + * Each counter is 8-bits, and saturates at 255 (0xFF). They + * are copied to the EEPROM (aka flash) whenever qib_update_eeprom_log() + * is called, but it can only be called in a context that allows sleep. + * This function can be called even at interrupt level. + */ +void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr) +{ + uint new_val; + unsigned long flags; + + spin_lock_irqsave(&dd->eep_st_lock, flags); + new_val = dd->eep_st_new_errs[eidx] + incr; + if (new_val > 255) + new_val = 255; + dd->eep_st_new_errs[eidx] = new_val; + spin_unlock_irqrestore(&dd->eep_st_lock, flags); +} diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c new file mode 100644 index 00000000..a7403248 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -0,0 +1,2328 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qib.h" +#include "qib_common.h" +#include "qib_user_sdma.h" + +static int qib_open(struct inode *, struct file *); +static int qib_close(struct inode *, struct file *); +static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); +static ssize_t qib_aio_write(struct kiocb *, const struct iovec *, + unsigned long, loff_t); +static unsigned int qib_poll(struct file *, struct poll_table_struct *); +static int qib_mmapf(struct file *, struct vm_area_struct *); + +static const struct file_operations qib_file_ops = { + .owner = THIS_MODULE, + .write = qib_write, + .aio_write = qib_aio_write, + .open = qib_open, + .release = qib_close, + .poll = qib_poll, + .mmap = qib_mmapf, + .llseek = noop_llseek, +}; + +/* + * Convert kernel virtual addresses to physical addresses so they don't + * potentially conflict with the chip addresses used as mmap offsets. + * It doesn't really matter what mmap offset we use as long as we can + * interpret it correctly. + */ +static u64 cvt_kvaddr(void *p) +{ + struct page *page; + u64 paddr = 0; + + page = vmalloc_to_page(p); + if (page) + paddr = page_to_pfn(page) << PAGE_SHIFT; + + return paddr; +} + +static int qib_get_base_info(struct file *fp, void __user *ubase, + size_t ubase_size) +{ + struct qib_ctxtdata *rcd = ctxt_fp(fp); + int ret = 0; + struct qib_base_info *kinfo = NULL; + struct qib_devdata *dd = rcd->dd; + struct qib_pportdata *ppd = rcd->ppd; + unsigned subctxt_cnt; + int shared, master; + size_t sz; + + subctxt_cnt = rcd->subctxt_cnt; + if (!subctxt_cnt) { + shared = 0; + master = 0; + subctxt_cnt = 1; + } else { + shared = 1; + master = !subctxt_fp(fp); + } + + sz = sizeof(*kinfo); + /* If context sharing is not requested, allow the old size structure */ + if (!shared) + sz -= 7 * sizeof(u64); + if (ubase_size < sz) { + ret = -EINVAL; + goto bail; + } + + kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); + if (kinfo == NULL) { + ret = -ENOMEM; + goto bail; + } + + ret = dd->f_get_base_info(rcd, kinfo); + if (ret < 0) + goto bail; + + kinfo->spi_rcvhdr_cnt = dd->rcvhdrcnt; + kinfo->spi_rcvhdrent_size = dd->rcvhdrentsize; + kinfo->spi_tidegrcnt = rcd->rcvegrcnt; + kinfo->spi_rcv_egrbufsize = dd->rcvegrbufsize; + /* + * have to mmap whole thing + */ + kinfo->spi_rcv_egrbuftotlen = + rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size; + kinfo->spi_rcv_egrperchunk = rcd->rcvegrbufs_perchunk; + kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / + rcd->rcvegrbuf_chunks; + kinfo->spi_tidcnt = dd->rcvtidcnt / subctxt_cnt; + if (master) + kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt; + /* + * for this use, may be cfgctxts summed over all chips that + * are are configured and present + */ + kinfo->spi_nctxts = dd->cfgctxts; + /* unit (chip/board) our context is on */ + kinfo->spi_unit = dd->unit; + kinfo->spi_port = ppd->port; + /* for now, only a single page */ + kinfo->spi_tid_maxsize = PAGE_SIZE; + + /* + * Doing this per context, and based on the skip value, etc. This has + * to be the actual buffer size, since the protocol code treats it + * as an array. + * + * These have to be set to user addresses in the user code via mmap. + * These values are used on return to user code for the mmap target + * addresses only. For 32 bit, same 44 bit address problem, so use + * the physical address, not virtual. Before 2.6.11, using the + * page_address() macro worked, but in 2.6.11, even that returns the + * full 64 bit address (upper bits all 1's). So far, using the + * physical addresses (or chip offsets, for chip mapping) works, but + * no doubt some future kernel release will change that, and we'll be + * on to yet another method of dealing with this. + * Normally only one of rcvhdr_tailaddr or rhf_offset is useful + * since the chips with non-zero rhf_offset don't normally + * enable tail register updates to host memory, but for testing, + * both can be enabled and used. + */ + kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys; + kinfo->spi_rhf_offset = dd->rhf_offset; + kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys; + kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys; + /* setup per-unit (not port) status area for user programs */ + kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + + (char *) ppd->statusp - + (char *) dd->pioavailregs_dma; + kinfo->spi_uregbase = (u64) dd->uregbase + dd->ureg_align * rcd->ctxt; + if (!shared) { + kinfo->spi_piocnt = rcd->piocnt; + kinfo->spi_piobufbase = (u64) rcd->piobufs; + kinfo->spi_sendbuf_status = cvt_kvaddr(rcd->user_event_mask); + } else if (master) { + kinfo->spi_piocnt = (rcd->piocnt / subctxt_cnt) + + (rcd->piocnt % subctxt_cnt); + /* Master's PIO buffers are after all the slave's */ + kinfo->spi_piobufbase = (u64) rcd->piobufs + + dd->palign * + (rcd->piocnt - kinfo->spi_piocnt); + } else { + unsigned slave = subctxt_fp(fp) - 1; + + kinfo->spi_piocnt = rcd->piocnt / subctxt_cnt; + kinfo->spi_piobufbase = (u64) rcd->piobufs + + dd->palign * kinfo->spi_piocnt * slave; + } + + if (shared) { + kinfo->spi_sendbuf_status = + cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]); + /* only spi_subctxt_* fields should be set in this block! */ + kinfo->spi_subctxt_uregbase = cvt_kvaddr(rcd->subctxt_uregbase); + + kinfo->spi_subctxt_rcvegrbuf = + cvt_kvaddr(rcd->subctxt_rcvegrbuf); + kinfo->spi_subctxt_rcvhdr_base = + cvt_kvaddr(rcd->subctxt_rcvhdr_base); + } + + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. Can't use piobufbase directly, because it has + * both 2K and 4K buffer base values. + */ + kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) / + dd->palign; + kinfo->spi_pioalign = dd->palign; + kinfo->spi_qpair = QIB_KD_QP; + /* + * user mode PIO buffers are always 2KB, even when 4KB can + * be received, and sent via the kernel; this is ibmaxlen + * for 2K MTU. + */ + kinfo->spi_piosize = dd->piosize2k - 2 * sizeof(u32); + kinfo->spi_mtu = ppd->ibmaxlen; /* maxlen, not ibmtu */ + kinfo->spi_ctxt = rcd->ctxt; + kinfo->spi_subctxt = subctxt_fp(fp); + kinfo->spi_sw_version = QIB_KERN_SWVERSION; + kinfo->spi_sw_version |= 1U << 31; /* QLogic-built, not kernel.org */ + kinfo->spi_hw_version = dd->revision; + + if (master) + kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER; + + sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); + if (copy_to_user(ubase, kinfo, sz)) + ret = -EFAULT; +bail: + kfree(kinfo); + return ret; +} + +/** + * qib_tid_update - update a context TID + * @rcd: the context + * @fp: the qib device file + * @ti: the TID information + * + * The new implementation as of Oct 2004 is that the driver assigns + * the tid and returns it to the caller. To reduce search time, we + * keep a cursor for each context, walking the shadow tid array to find + * one that's not in use. + * + * For now, if we can't allocate the full list, we fail, although + * in the long run, we'll allocate as many as we can, and the + * caller will deal with that by trying the remaining pages later. + * That means that when we fail, we have to mark the tids as not in + * use again, in our shadow copy. + * + * It's up to the caller to free the tids when they are done. + * We'll unlock the pages as they free them. + * + * Also, right now we are locking one page at a time, but since + * the intended use of this routine is for a single group of + * virtually contiguous pages, that should change to improve + * performance. + */ +static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, + const struct qib_tid_info *ti) +{ + int ret = 0, ntids; + u32 tid, ctxttid, cnt, i, tidcnt, tidoff; + u16 *tidlist; + struct qib_devdata *dd = rcd->dd; + u64 physaddr; + unsigned long vaddr; + u64 __iomem *tidbase; + unsigned long tidmap[8]; + struct page **pagep = NULL; + unsigned subctxt = subctxt_fp(fp); + + if (!dd->pageshadow) { + ret = -ENOMEM; + goto done; + } + + cnt = ti->tidcnt; + if (!cnt) { + ret = -EFAULT; + goto done; + } + ctxttid = rcd->ctxt * dd->rcvtidcnt; + if (!rcd->subctxt_cnt) { + tidcnt = dd->rcvtidcnt; + tid = rcd->tidcursor; + tidoff = 0; + } else if (!subctxt) { + tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) + + (dd->rcvtidcnt % rcd->subctxt_cnt); + tidoff = dd->rcvtidcnt - tidcnt; + ctxttid += tidoff; + tid = tidcursor_fp(fp); + } else { + tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt; + tidoff = tidcnt * (subctxt - 1); + ctxttid += tidoff; + tid = tidcursor_fp(fp); + } + if (cnt > tidcnt) { + /* make sure it all fits in tid_pg_list */ + qib_devinfo(dd->pcidev, "Process tried to allocate %u " + "TIDs, only trying max (%u)\n", cnt, tidcnt); + cnt = tidcnt; + } + pagep = (struct page **) rcd->tid_pg_list; + tidlist = (u16 *) &pagep[dd->rcvtidcnt]; + pagep += tidoff; + tidlist += tidoff; + + memset(tidmap, 0, sizeof(tidmap)); + /* before decrement; chip actual # */ + ntids = tidcnt; + tidbase = (u64 __iomem *) (((char __iomem *) dd->kregbase) + + dd->rcvtidbase + + ctxttid * sizeof(*tidbase)); + + /* virtual address of first page in transfer */ + vaddr = ti->tidvaddr; + if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, + cnt * PAGE_SIZE)) { + ret = -EFAULT; + goto done; + } + ret = qib_get_user_pages(vaddr, cnt, pagep); + if (ret) { + /* + * if (ret == -EBUSY) + * We can't continue because the pagep array won't be + * initialized. This should never happen, + * unless perhaps the user has mpin'ed the pages + * themselves. + */ + qib_devinfo(dd->pcidev, + "Failed to lock addr %p, %u pages: " + "errno %d\n", (void *) vaddr, cnt, -ret); + goto done; + } + for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { + for (; ntids--; tid++) { + if (tid == tidcnt) + tid = 0; + if (!dd->pageshadow[ctxttid + tid]) + break; + } + if (ntids < 0) { + /* + * Oops, wrapped all the way through their TIDs, + * and didn't have enough free; see comments at + * start of routine + */ + i--; /* last tidlist[i] not filled in */ + ret = -ENOMEM; + break; + } + tidlist[i] = tid + tidoff; + /* we "know" system pages and TID pages are same size */ + dd->pageshadow[ctxttid + tid] = pagep[i]; + dd->physshadow[ctxttid + tid] = + qib_map_page(dd->pcidev, pagep[i], 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + /* + * don't need atomic or it's overhead + */ + __set_bit(tid, tidmap); + physaddr = dd->physshadow[ctxttid + tid]; + /* PERFORMANCE: below should almost certainly be cached */ + dd->f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, physaddr); + /* + * don't check this tid in qib_ctxtshadow, since we + * just filled it in; start with the next one. + */ + tid++; + } + + if (ret) { + u32 limit; +cleanup: + /* jump here if copy out of updated info failed... */ + /* same code that's in qib_free_tid() */ + limit = sizeof(tidmap) * BITS_PER_BYTE; + if (limit > tidcnt) + /* just in case size changes in future */ + limit = tidcnt; + tid = find_first_bit((const unsigned long *)tidmap, limit); + for (; tid < limit; tid++) { + if (!test_bit(tid, tidmap)) + continue; + if (dd->pageshadow[ctxttid + tid]) { + dma_addr_t phys; + + phys = dd->physshadow[ctxttid + tid]; + dd->physshadow[ctxttid + tid] = dd->tidinvalid; + /* PERFORMANCE: below should almost certainly + * be cached + */ + dd->f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, + dd->tidinvalid); + pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + dd->pageshadow[ctxttid + tid] = NULL; + } + } + qib_release_user_pages(pagep, cnt); + } else { + /* + * Copy the updated array, with qib_tid's filled in, back + * to user. Since we did the copy in already, this "should + * never fail" If it does, we have to clean up... + */ + if (copy_to_user((void __user *) + (unsigned long) ti->tidlist, + tidlist, cnt * sizeof(*tidlist))) { + ret = -EFAULT; + goto cleanup; + } + if (copy_to_user((void __user *) (unsigned long) ti->tidmap, + tidmap, sizeof tidmap)) { + ret = -EFAULT; + goto cleanup; + } + if (tid == tidcnt) + tid = 0; + if (!rcd->subctxt_cnt) + rcd->tidcursor = tid; + else + tidcursor_fp(fp) = tid; + } + +done: + return ret; +} + +/** + * qib_tid_free - free a context TID + * @rcd: the context + * @subctxt: the subcontext + * @ti: the TID info + * + * right now we are unlocking one page at a time, but since + * the intended use of this routine is for a single group of + * virtually contiguous pages, that should change to improve + * performance. We check that the TID is in range for this context + * but otherwise don't check validity; if user has an error and + * frees the wrong tid, it's only their own data that can thereby + * be corrupted. We do check that the TID was in use, for sanity + * We always use our idea of the saved address, not the address that + * they pass in to us. + */ +static int qib_tid_free(struct qib_ctxtdata *rcd, unsigned subctxt, + const struct qib_tid_info *ti) +{ + int ret = 0; + u32 tid, ctxttid, cnt, limit, tidcnt; + struct qib_devdata *dd = rcd->dd; + u64 __iomem *tidbase; + unsigned long tidmap[8]; + + if (!dd->pageshadow) { + ret = -ENOMEM; + goto done; + } + + if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, + sizeof tidmap)) { + ret = -EFAULT; + goto done; + } + + ctxttid = rcd->ctxt * dd->rcvtidcnt; + if (!rcd->subctxt_cnt) + tidcnt = dd->rcvtidcnt; + else if (!subctxt) { + tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) + + (dd->rcvtidcnt % rcd->subctxt_cnt); + ctxttid += dd->rcvtidcnt - tidcnt; + } else { + tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt; + ctxttid += tidcnt * (subctxt - 1); + } + tidbase = (u64 __iomem *) ((char __iomem *)(dd->kregbase) + + dd->rcvtidbase + + ctxttid * sizeof(*tidbase)); + + limit = sizeof(tidmap) * BITS_PER_BYTE; + if (limit > tidcnt) + /* just in case size changes in future */ + limit = tidcnt; + tid = find_first_bit(tidmap, limit); + for (cnt = 0; tid < limit; tid++) { + /* + * small optimization; if we detect a run of 3 or so without + * any set, use find_first_bit again. That's mainly to + * accelerate the case where we wrapped, so we have some at + * the beginning, and some at the end, and a big gap + * in the middle. + */ + if (!test_bit(tid, tidmap)) + continue; + cnt++; + if (dd->pageshadow[ctxttid + tid]) { + struct page *p; + dma_addr_t phys; + + p = dd->pageshadow[ctxttid + tid]; + dd->pageshadow[ctxttid + tid] = NULL; + phys = dd->physshadow[ctxttid + tid]; + dd->physshadow[ctxttid + tid] = dd->tidinvalid; + /* PERFORMANCE: below should almost certainly be + * cached + */ + dd->f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, dd->tidinvalid); + pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + qib_release_user_pages(&p, 1); + } + } +done: + return ret; +} + +/** + * qib_set_part_key - set a partition key + * @rcd: the context + * @key: the key + * + * We can have up to 4 active at a time (other than the default, which is + * always allowed). This is somewhat tricky, since multiple contexts may set + * the same key, so we reference count them, and clean up at exit. All 4 + * partition keys are packed into a single qlogic_ib register. It's an + * error for a process to set the same pkey multiple times. We provide no + * mechanism to de-allocate a pkey at this time, we may eventually need to + * do that. I've used the atomic operations, and no locking, and only make + * a single pass through what's available. This should be more than + * adequate for some time. I'll think about spinlocks or the like if and as + * it's necessary. + */ +static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key) +{ + struct qib_pportdata *ppd = rcd->ppd; + int i, any = 0, pidx = -1; + u16 lkey = key & 0x7FFF; + int ret; + + if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF)) { + /* nothing to do; this key always valid */ + ret = 0; + goto bail; + } + + if (!lkey) { + ret = -EINVAL; + goto bail; + } + + /* + * Set the full membership bit, because it has to be + * set in the register or the packet, and it seems + * cleaner to set in the register than to force all + * callers to set it. + */ + key |= 0x8000; + + for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { + if (!rcd->pkeys[i] && pidx == -1) + pidx = i; + if (rcd->pkeys[i] == key) { + ret = -EEXIST; + goto bail; + } + } + if (pidx == -1) { + ret = -EBUSY; + goto bail; + } + for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i]) { + any++; + continue; + } + if (ppd->pkeys[i] == key) { + atomic_t *pkrefs = &ppd->pkeyrefs[i]; + + if (atomic_inc_return(pkrefs) > 1) { + rcd->pkeys[pidx] = key; + ret = 0; + goto bail; + } else { + /* + * lost race, decrement count, catch below + */ + atomic_dec(pkrefs); + any++; + } + } + if ((ppd->pkeys[i] & 0x7FFF) == lkey) { + /* + * It makes no sense to have both the limited and + * full membership PKEY set at the same time since + * the unlimited one will disable the limited one. + */ + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i] && + atomic_inc_return(&ppd->pkeyrefs[i]) == 1) { + rcd->pkeys[pidx] = key; + ppd->pkeys[i] = key; + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); + ret = 0; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * qib_manage_rcvq - manage a context's receive queue + * @rcd: the context + * @subctxt: the subcontext + * @start_stop: action to carry out + * + * start_stop == 0 disables receive on the context, for use in queue + * overflow conditions. start_stop==1 re-enables, to be used to + * re-init the software copy of the head register + */ +static int qib_manage_rcvq(struct qib_ctxtdata *rcd, unsigned subctxt, + int start_stop) +{ + struct qib_devdata *dd = rcd->dd; + unsigned int rcvctrl_op; + + if (subctxt) + goto bail; + /* atomically clear receive enable ctxt. */ + if (start_stop) { + /* + * On enable, force in-memory copy of the tail register to + * 0, so that protocol code doesn't have to worry about + * whether or not the chip has yet updated the in-memory + * copy or not on return from the system call. The chip + * always resets it's tail register back to 0 on a + * transition from disabled to enabled. + */ + if (rcd->rcvhdrtail_kvaddr) + qib_clear_rcvhdrtail(rcd); + rcvctrl_op = QIB_RCVCTRL_CTXT_ENB; + } else + rcvctrl_op = QIB_RCVCTRL_CTXT_DIS; + dd->f_rcvctrl(rcd->ppd, rcvctrl_op, rcd->ctxt); + /* always; new head should be equal to new tail; see above */ +bail: + return 0; +} + +static void qib_clean_part_key(struct qib_ctxtdata *rcd, + struct qib_devdata *dd) +{ + int i, j, pchanged = 0; + u64 oldpkey; + struct qib_pportdata *ppd = rcd->ppd; + + /* for debugging only */ + oldpkey = (u64) ppd->pkeys[0] | + ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + + for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { + if (!rcd->pkeys[i]) + continue; + for (j = 0; j < ARRAY_SIZE(ppd->pkeys); j++) { + /* check for match independent of the global bit */ + if ((ppd->pkeys[j] & 0x7fff) != + (rcd->pkeys[i] & 0x7fff)) + continue; + if (atomic_dec_and_test(&ppd->pkeyrefs[j])) { + ppd->pkeys[j] = 0; + pchanged++; + } + break; + } + rcd->pkeys[i] = 0; + } + if (pchanged) + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); +} + +/* common code for the mappings on dma_alloc_coherent mem */ +static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd, + unsigned len, void *kvaddr, u32 write_ok, char *what) +{ + struct qib_devdata *dd = rcd->dd; + unsigned long pfn; + int ret; + + if ((vma->vm_end - vma->vm_start) > len) { + qib_devinfo(dd->pcidev, + "FAIL on %s: len %lx > %x\n", what, + vma->vm_end - vma->vm_start, len); + ret = -EFAULT; + goto bail; + } + + /* + * shared context user code requires rcvhdrq mapped r/w, others + * only allowed readonly mapping. + */ + if (!write_ok) { + if (vma->vm_flags & VM_WRITE) { + qib_devinfo(dd->pcidev, + "%s must be mapped readonly\n", what); + ret = -EPERM; + goto bail; + } + + /* don't allow them to later change with mprotect */ + vma->vm_flags &= ~VM_MAYWRITE; + } + + pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, vma->vm_start, pfn, + len, vma->vm_page_prot); + if (ret) + qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x " + "bytes failed: %d\n", what, rcd->ctxt, + pfn, len, ret); +bail: + return ret; +} + +static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd, + u64 ureg) +{ + unsigned long phys; + unsigned long sz; + int ret; + + /* + * This is real hardware, so use io_remap. This is the mechanism + * for the user process to update the head registers for their ctxt + * in the chip. + */ + sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; + if ((vma->vm_end - vma->vm_start) > sz) { + qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen " + "%lx > PAGE\n", vma->vm_end - vma->vm_start); + ret = -EFAULT; + } else { + phys = dd->physaddr + ureg; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + ret = io_remap_pfn_range(vma, vma->vm_start, + phys >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } + return ret; +} + +static int mmap_piobufs(struct vm_area_struct *vma, + struct qib_devdata *dd, + struct qib_ctxtdata *rcd, + unsigned piobufs, unsigned piocnt) +{ + unsigned long phys; + int ret; + + /* + * When we map the PIO buffers in the chip, we want to map them as + * writeonly, no read possible; unfortunately, x86 doesn't allow + * for this in hardware, but we still prevent users from asking + * for it. + */ + if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { + qib_devinfo(dd->pcidev, "FAIL mmap piobufs: " + "reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); + ret = -EINVAL; + goto bail; + } + + phys = dd->physaddr + piobufs; + +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; + pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; +#endif + + /* + * don't allow them to later change to readable with mprotect (for when + * not initially mapped readable, as is normally the case) + */ + vma->vm_flags &= ~VM_MAYREAD; + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + + if (qib_wc_pat) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +bail: + return ret; +} + +static int mmap_rcvegrbufs(struct vm_area_struct *vma, + struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + unsigned long start, size; + size_t total_size, i; + unsigned long pfn; + int ret; + + size = rcd->rcvegrbuf_size; + total_size = rcd->rcvegrbuf_chunks * size; + if ((vma->vm_end - vma->vm_start) > total_size) { + qib_devinfo(dd->pcidev, "FAIL on egr bufs: " + "reqlen %lx > actual %lx\n", + vma->vm_end - vma->vm_start, + (unsigned long) total_size); + ret = -EINVAL; + goto bail; + } + + if (vma->vm_flags & VM_WRITE) { + qib_devinfo(dd->pcidev, "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* don't allow them to later change to writeable with mprotect */ + vma->vm_flags &= ~VM_MAYWRITE; + + start = vma->vm_start; + + for (i = 0; i < rcd->rcvegrbuf_chunks; i++, start += size) { + pfn = virt_to_phys(rcd->rcvegrbuf[i]) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, start, pfn, size, + vma->vm_page_prot); + if (ret < 0) + goto bail; + } + ret = 0; + +bail: + return ret; +} + +/* + * qib_file_vma_fault - handle a VMA page fault. + */ +static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *page; + + page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); + if (!page) + return VM_FAULT_SIGBUS; + + get_page(page); + vmf->page = page; + + return 0; +} + +static struct vm_operations_struct qib_file_vm_ops = { + .fault = qib_file_vma_fault, +}; + +static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, + struct qib_ctxtdata *rcd, unsigned subctxt) +{ + struct qib_devdata *dd = rcd->dd; + unsigned subctxt_cnt; + unsigned long len; + void *addr; + size_t size; + int ret = 0; + + subctxt_cnt = rcd->subctxt_cnt; + size = rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size; + + /* + * Each process has all the subctxt uregbase, rcvhdrq, and + * rcvegrbufs mmapped - as an array for all the processes, + * and also separately for this process. + */ + if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase)) { + addr = rcd->subctxt_uregbase; + size = PAGE_SIZE * subctxt_cnt; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base)) { + addr = rcd->subctxt_rcvhdr_base; + size = rcd->rcvhdrq_size * subctxt_cnt; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf)) { + addr = rcd->subctxt_rcvegrbuf; + size *= subctxt_cnt; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase + + PAGE_SIZE * subctxt)) { + addr = rcd->subctxt_uregbase + PAGE_SIZE * subctxt; + size = PAGE_SIZE; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base + + rcd->rcvhdrq_size * subctxt)) { + addr = rcd->subctxt_rcvhdr_base + + rcd->rcvhdrq_size * subctxt; + size = rcd->rcvhdrq_size; + } else if (pgaddr == cvt_kvaddr(&rcd->user_event_mask[subctxt])) { + addr = rcd->user_event_mask; + size = PAGE_SIZE; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf + + size * subctxt)) { + addr = rcd->subctxt_rcvegrbuf + size * subctxt; + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + qib_devinfo(dd->pcidev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; + } else + goto bail; + len = vma->vm_end - vma->vm_start; + if (len > size) { + ret = -EINVAL; + goto bail; + } + + vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; + vma->vm_ops = &qib_file_vm_ops; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + ret = 1; + +bail: + return ret; +} + +/** + * qib_mmapf - mmap various structures into user space + * @fp: the file pointer + * @vma: the VM area + * + * We use this to have a shared buffer between the kernel and the user code + * for the rcvhdr queue, egr buffers, and the per-context user regs and pio + * buffers in the chip. We have the open and close entries so we can bump + * the ref count and keep the driver from being unloaded while still mapped. + */ +static int qib_mmapf(struct file *fp, struct vm_area_struct *vma) +{ + struct qib_ctxtdata *rcd; + struct qib_devdata *dd; + u64 pgaddr, ureg; + unsigned piobufs, piocnt; + int ret, match = 1; + + rcd = ctxt_fp(fp); + if (!rcd || !(vma->vm_flags & VM_SHARED)) { + ret = -EINVAL; + goto bail; + } + dd = rcd->dd; + + /* + * This is the qib_do_user_init() code, mapping the shared buffers + * and per-context user registers into the user process. The address + * referred to by vm_pgoff is the file offset passed via mmap(). + * For shared contexts, this is the kernel vmalloc() address of the + * pages to share with the master. + * For non-shared or master ctxts, this is a physical address. + * We only do one mmap for each space mapped. + */ + pgaddr = vma->vm_pgoff << PAGE_SHIFT; + + /* + * Check for 0 in case one of the allocations failed, but user + * called mmap anyway. + */ + if (!pgaddr) { + ret = -EINVAL; + goto bail; + } + + /* + * Physical addresses must fit in 40 bits for our hardware. + * Check for kernel virtual addresses first, anything else must + * match a HW or memory address. + */ + ret = mmap_kvaddr(vma, pgaddr, rcd, subctxt_fp(fp)); + if (ret) { + if (ret > 0) + ret = 0; + goto bail; + } + + ureg = dd->uregbase + dd->ureg_align * rcd->ctxt; + if (!rcd->subctxt_cnt) { + /* ctxt is not shared */ + piocnt = rcd->piocnt; + piobufs = rcd->piobufs; + } else if (!subctxt_fp(fp)) { + /* caller is the master */ + piocnt = (rcd->piocnt / rcd->subctxt_cnt) + + (rcd->piocnt % rcd->subctxt_cnt); + piobufs = rcd->piobufs + + dd->palign * (rcd->piocnt - piocnt); + } else { + unsigned slave = subctxt_fp(fp) - 1; + + /* caller is a slave */ + piocnt = rcd->piocnt / rcd->subctxt_cnt; + piobufs = rcd->piobufs + dd->palign * piocnt * slave; + } + + if (pgaddr == ureg) + ret = mmap_ureg(vma, dd, ureg); + else if (pgaddr == piobufs) + ret = mmap_piobufs(vma, dd, rcd, piobufs, piocnt); + else if (pgaddr == dd->pioavailregs_phys) + /* in-memory copy of pioavail registers */ + ret = qib_mmap_mem(vma, rcd, PAGE_SIZE, + (void *) dd->pioavailregs_dma, 0, + "pioavail registers"); + else if (pgaddr == rcd->rcvegr_phys) + ret = mmap_rcvegrbufs(vma, rcd); + else if (pgaddr == (u64) rcd->rcvhdrq_phys) + /* + * The rcvhdrq itself; multiple pages, contiguous + * from an i/o perspective. Shared contexts need + * to map r/w, so we allow writing. + */ + ret = qib_mmap_mem(vma, rcd, rcd->rcvhdrq_size, + rcd->rcvhdrq, 1, "rcvhdrq"); + else if (pgaddr == (u64) rcd->rcvhdrqtailaddr_phys) + /* in-memory copy of rcvhdrq tail register */ + ret = qib_mmap_mem(vma, rcd, PAGE_SIZE, + rcd->rcvhdrtail_kvaddr, 0, + "rcvhdrq tail"); + else + match = 0; + if (!match) + ret = -EINVAL; + + vma->vm_private_data = NULL; + + if (ret < 0) + qib_devinfo(dd->pcidev, + "mmap Failure %d: off %llx len %lx\n", + -ret, (unsigned long long)pgaddr, + vma->vm_end - vma->vm_start); +bail: + return ret; +} + +static unsigned int qib_poll_urgent(struct qib_ctxtdata *rcd, + struct file *fp, + struct poll_table_struct *pt) +{ + struct qib_devdata *dd = rcd->dd; + unsigned pollflag; + + poll_wait(fp, &rcd->wait, pt); + + spin_lock_irq(&dd->uctxt_lock); + if (rcd->urgent != rcd->urgent_poll) { + pollflag = POLLIN | POLLRDNORM; + rcd->urgent_poll = rcd->urgent; + } else { + pollflag = 0; + set_bit(QIB_CTXT_WAITING_URG, &rcd->flag); + } + spin_unlock_irq(&dd->uctxt_lock); + + return pollflag; +} + +static unsigned int qib_poll_next(struct qib_ctxtdata *rcd, + struct file *fp, + struct poll_table_struct *pt) +{ + struct qib_devdata *dd = rcd->dd; + unsigned pollflag; + + poll_wait(fp, &rcd->wait, pt); + + spin_lock_irq(&dd->uctxt_lock); + if (dd->f_hdrqempty(rcd)) { + set_bit(QIB_CTXT_WAITING_RCV, &rcd->flag); + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_ENB, rcd->ctxt); + pollflag = 0; + } else + pollflag = POLLIN | POLLRDNORM; + spin_unlock_irq(&dd->uctxt_lock); + + return pollflag; +} + +static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) +{ + struct qib_ctxtdata *rcd; + unsigned pollflag; + + rcd = ctxt_fp(fp); + if (!rcd) + pollflag = POLLERR; + else if (rcd->poll_type == QIB_POLL_TYPE_URGENT) + pollflag = qib_poll_urgent(rcd, fp, pt); + else if (rcd->poll_type == QIB_POLL_TYPE_ANYRCV) + pollflag = qib_poll_next(rcd, fp, pt); + else /* invalid */ + pollflag = POLLERR; + + return pollflag; +} + +/* + * Check that userland and driver are compatible for subcontexts. + */ +static int qib_compatible_subctxts(int user_swmajor, int user_swminor) +{ + /* this code is written long-hand for clarity */ + if (QIB_USER_SWMAJOR != user_swmajor) { + /* no promise of compatibility if major mismatch */ + return 0; + } + if (QIB_USER_SWMAJOR == 1) { + switch (QIB_USER_SWMINOR) { + case 0: + case 1: + case 2: + /* no subctxt implementation so cannot be compatible */ + return 0; + case 3: + /* 3 is only compatible with itself */ + return user_swminor == 3; + default: + /* >= 4 are compatible (or are expected to be) */ + return user_swminor >= 4; + } + } + /* make no promises yet for future major versions */ + return 0; +} + +static int init_subctxts(struct qib_devdata *dd, + struct qib_ctxtdata *rcd, + const struct qib_user_info *uinfo) +{ + int ret = 0; + unsigned num_subctxts; + size_t size; + + /* + * If the user is requesting zero subctxts, + * skip the subctxt allocation. + */ + if (uinfo->spu_subctxt_cnt <= 0) + goto bail; + num_subctxts = uinfo->spu_subctxt_cnt; + + /* Check for subctxt compatibility */ + if (!qib_compatible_subctxts(uinfo->spu_userversion >> 16, + uinfo->spu_userversion & 0xffff)) { + qib_devinfo(dd->pcidev, + "Mismatched user version (%d.%d) and driver " + "version (%d.%d) while context sharing. Ensure " + "that driver and library are from the same " + "release.\n", + (int) (uinfo->spu_userversion >> 16), + (int) (uinfo->spu_userversion & 0xffff), + QIB_USER_SWMAJOR, QIB_USER_SWMINOR); + goto bail; + } + if (num_subctxts > QLOGIC_IB_MAX_SUBCTXT) { + ret = -EINVAL; + goto bail; + } + + rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts); + if (!rcd->subctxt_uregbase) { + ret = -ENOMEM; + goto bail; + } + /* Note: rcd->rcvhdrq_size isn't initialized yet. */ + size = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * + sizeof(u32), PAGE_SIZE) * num_subctxts; + rcd->subctxt_rcvhdr_base = vmalloc_user(size); + if (!rcd->subctxt_rcvhdr_base) { + ret = -ENOMEM; + goto bail_ureg; + } + + rcd->subctxt_rcvegrbuf = vmalloc_user(rcd->rcvegrbuf_chunks * + rcd->rcvegrbuf_size * + num_subctxts); + if (!rcd->subctxt_rcvegrbuf) { + ret = -ENOMEM; + goto bail_rhdr; + } + + rcd->subctxt_cnt = uinfo->spu_subctxt_cnt; + rcd->subctxt_id = uinfo->spu_subctxt_id; + rcd->active_slaves = 1; + rcd->redirect_seq_cnt = 1; + set_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag); + goto bail; + +bail_rhdr: + vfree(rcd->subctxt_rcvhdr_base); +bail_ureg: + vfree(rcd->subctxt_uregbase); + rcd->subctxt_uregbase = NULL; +bail: + return ret; +} + +static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, + struct file *fp, const struct qib_user_info *uinfo) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + void *ptmp = NULL; + int ret; + + rcd = qib_create_ctxtdata(ppd, ctxt); + + /* + * Allocate memory for use in qib_tid_update() at open to + * reduce cost of expected send setup per message segment + */ + if (rcd) + ptmp = kmalloc(dd->rcvtidcnt * sizeof(u16) + + dd->rcvtidcnt * sizeof(struct page **), + GFP_KERNEL); + + if (!rcd || !ptmp) { + qib_dev_err(dd, "Unable to allocate ctxtdata " + "memory, failing open\n"); + ret = -ENOMEM; + goto bailerr; + } + rcd->userversion = uinfo->spu_userversion; + ret = init_subctxts(dd, rcd, uinfo); + if (ret) + goto bailerr; + rcd->tid_pg_list = ptmp; + rcd->pid = current->pid; + init_waitqueue_head(&dd->rcd[ctxt]->wait); + strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); + ctxt_fp(fp) = rcd; + qib_stats.sps_ctxts++; + dd->freectxts--; + ret = 0; + goto bail; + +bailerr: + dd->rcd[ctxt] = NULL; + kfree(rcd); + kfree(ptmp); +bail: + return ret; +} + +static inline int usable(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + + return dd && (dd->flags & QIB_PRESENT) && dd->kregbase && ppd->lid && + (ppd->lflags & QIBL_LINKACTIVE); +} + +/* + * Select a context on the given device, either using a requested port + * or the port based on the context number. + */ +static int choose_port_ctxt(struct file *fp, struct qib_devdata *dd, u32 port, + const struct qib_user_info *uinfo) +{ + struct qib_pportdata *ppd = NULL; + int ret, ctxt; + + if (port) { + if (!usable(dd->pport + port - 1)) { + ret = -ENETDOWN; + goto done; + } else + ppd = dd->pport + port - 1; + } + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts && dd->rcd[ctxt]; + ctxt++) + ; + if (ctxt == dd->cfgctxts) { + ret = -EBUSY; + goto done; + } + if (!ppd) { + u32 pidx = ctxt % dd->num_pports; + if (usable(dd->pport + pidx)) + ppd = dd->pport + pidx; + else { + for (pidx = 0; pidx < dd->num_pports && !ppd; + pidx++) + if (usable(dd->pport + pidx)) + ppd = dd->pport + pidx; + } + } + ret = ppd ? setup_ctxt(ppd, ctxt, fp, uinfo) : -ENETDOWN; +done: + return ret; +} + +static int find_free_ctxt(int unit, struct file *fp, + const struct qib_user_info *uinfo) +{ + struct qib_devdata *dd = qib_lookup(unit); + int ret; + + if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports)) + ret = -ENODEV; + else + ret = choose_port_ctxt(fp, dd, uinfo->spu_port, uinfo); + + return ret; +} + +static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, + unsigned alg) +{ + struct qib_devdata *udd = NULL; + int ret = 0, devmax, npresent, nup, ndev, dusable = 0, i; + u32 port = uinfo->spu_port, ctxt; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (nup == 0) { + ret = -ENETDOWN; + goto done; + } + + if (alg == QIB_PORT_ALG_ACROSS) { + unsigned inuse = ~0U; + /* find device (with ACTIVE ports) with fewest ctxts in use */ + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + unsigned cused = 0, cfree = 0, pusable = 0; + if (!dd) + continue; + if (port && port <= dd->num_pports && + usable(dd->pport + port - 1)) + pusable = 1; + else + for (i = 0; i < dd->num_pports; i++) + if (usable(dd->pport + i)) + pusable++; + if (!pusable) + continue; + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; + ctxt++) + if (dd->rcd[ctxt]) + cused++; + else + cfree++; + if (pusable && cfree && cused < inuse) { + udd = dd; + inuse = cused; + } + } + if (udd) { + ret = choose_port_ctxt(fp, udd, port, uinfo); + goto done; + } + } else { + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + ret = choose_port_ctxt(fp, dd, port, uinfo); + if (!ret) + goto done; + if (ret == -EBUSY) + dusable++; + } + } + } + ret = dusable ? -EBUSY : -ENETDOWN; + +done: + return ret; +} + +static int find_shared_ctxt(struct file *fp, + const struct qib_user_info *uinfo) +{ + int devmax, ndev, i; + int ret = 0; + + devmax = qib_count_units(NULL, NULL); + + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + + /* device portion of usable() */ + if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase)) + continue; + for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { + struct qib_ctxtdata *rcd = dd->rcd[i]; + + /* Skip ctxts which are not yet open */ + if (!rcd || !rcd->cnt) + continue; + /* Skip ctxt if it doesn't match the requested one */ + if (rcd->subctxt_id != uinfo->spu_subctxt_id) + continue; + /* Verify the sharing process matches the master */ + if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt || + rcd->userversion != uinfo->spu_userversion || + rcd->cnt >= rcd->subctxt_cnt) { + ret = -EINVAL; + goto done; + } + ctxt_fp(fp) = rcd; + subctxt_fp(fp) = rcd->cnt++; + rcd->subpid[subctxt_fp(fp)] = current->pid; + tidcursor_fp(fp) = 0; + rcd->active_slaves |= 1 << subctxt_fp(fp); + ret = 1; + goto done; + } + } + +done: + return ret; +} + +static int qib_open(struct inode *in, struct file *fp) +{ + /* The real work is performed later in qib_assign_ctxt() */ + fp->private_data = kzalloc(sizeof(struct qib_filedata), GFP_KERNEL); + if (fp->private_data) /* no cpu affinity by default */ + ((struct qib_filedata *)fp->private_data)->rec_cpu_num = -1; + return fp->private_data ? 0 : -ENOMEM; +} + +/* + * Get ctxt early, so can set affinity prior to memory allocation. + */ +static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) +{ + int ret; + int i_minor; + unsigned swmajor, swminor, alg = QIB_PORT_ALG_ACROSS; + + /* Check to be sure we haven't already initialized this file */ + if (ctxt_fp(fp)) { + ret = -EINVAL; + goto done; + } + + /* for now, if major version is different, bail */ + swmajor = uinfo->spu_userversion >> 16; + if (swmajor != QIB_USER_SWMAJOR) { + ret = -ENODEV; + goto done; + } + + swminor = uinfo->spu_userversion & 0xffff; + + if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT) + alg = uinfo->spu_port_alg; + + mutex_lock(&qib_mutex); + + if (qib_compatible_subctxts(swmajor, swminor) && + uinfo->spu_subctxt_cnt) { + ret = find_shared_ctxt(fp, uinfo); + if (ret) { + if (ret > 0) + ret = 0; + goto done_chk_sdma; + } + } + + i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE; + if (i_minor) + ret = find_free_ctxt(i_minor - 1, fp, uinfo); + else + ret = get_a_ctxt(fp, uinfo, alg); + +done_chk_sdma: + if (!ret) { + struct qib_filedata *fd = fp->private_data; + const struct qib_ctxtdata *rcd = fd->rcd; + const struct qib_devdata *dd = rcd->dd; + unsigned int weight; + + if (dd->flags & QIB_HAS_SEND_DMA) { + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + ret = -ENOMEM; + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendezvous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + weight = cpumask_weight(tsk_cpus_allowed(current)); + if (!ret && weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu != qib_cpulist_count) { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } else if (weight == 1 && + test_bit(cpumask_first(tsk_cpus_allowed(current)), + qib_cpulist)) + qib_devinfo(dd->pcidev, "%s PID %u affinity " + "set to cpu %d; already allocated\n", + current->comm, current->pid, + cpumask_first(tsk_cpus_allowed(current))); + } + + mutex_unlock(&qib_mutex); + +done: + return ret; +} + + +static int qib_do_user_init(struct file *fp, + const struct qib_user_info *uinfo) +{ + int ret; + struct qib_ctxtdata *rcd = ctxt_fp(fp); + struct qib_devdata *dd; + unsigned uctxt; + + /* Subctxts don't need to initialize anything since master did it. */ + if (subctxt_fp(fp)) { + ret = wait_event_interruptible(rcd->wait, + !test_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag)); + goto bail; + } + + dd = rcd->dd; + + /* some ctxts may get extra buffers, calculate that here */ + uctxt = rcd->ctxt - dd->first_user_ctxt; + if (uctxt < dd->ctxts_extrabuf) { + rcd->piocnt = dd->pbufsctxt + 1; + rcd->pio_base = rcd->piocnt * uctxt; + } else { + rcd->piocnt = dd->pbufsctxt; + rcd->pio_base = rcd->piocnt * uctxt + + dd->ctxts_extrabuf; + } + + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. Can't use piobufbase directly, because it has + * both 2K and 4K buffer base values. So check and handle. + */ + if ((rcd->pio_base + rcd->piocnt) > dd->piobcnt2k) { + if (rcd->pio_base >= dd->piobcnt2k) { + qib_dev_err(dd, + "%u:ctxt%u: no 2KB buffers available\n", + dd->unit, rcd->ctxt); + ret = -ENOBUFS; + goto bail; + } + rcd->piocnt = dd->piobcnt2k - rcd->pio_base; + qib_dev_err(dd, "Ctxt%u: would use 4KB bufs, using %u\n", + rcd->ctxt, rcd->piocnt); + } + + rcd->piobufs = dd->pio2k_bufbase + rcd->pio_base * dd->palign; + qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt, + TXCHK_CHG_TYPE_USER, rcd); + /* + * try to ensure that processes start up with consistent avail update + * for their own range, at least. If system very quiet, it might + * have the in-memory copy out of date at startup for this range of + * buffers, when a context gets re-used. Do after the chg_pioavail + * and before the rest of setup, so it's "almost certain" the dma + * will have occurred (can't 100% guarantee, but should be many + * decimals of 9s, with this ordering), given how much else happens + * after this. + */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + + /* + * Now allocate the rcvhdr Q and eager TIDs; skip the TID + * array for time being. If rcd->ctxt > chip-supported, + * we need to do extra stuff here to handle by handling overflow + * through ctxt 0, someday + */ + ret = qib_create_rcvhdrq(dd, rcd); + if (!ret) + ret = qib_setup_eagerbufs(rcd); + if (ret) + goto bail_pio; + + rcd->tidcursor = 0; /* start at beginning after open */ + + /* initialize poll variables... */ + rcd->urgent = 0; + rcd->urgent_poll = 0; + + /* + * Now enable the ctxt for receive. + * For chips that are set to DMA the tail register to memory + * when they change (and when the update bit transitions from + * 0 to 1. So for those chips, we turn it off and then back on. + * This will (very briefly) affect any other open ctxts, but the + * duration is very short, and therefore isn't an issue. We + * explicitly set the in-memory tail copy to 0 beforehand, so we + * don't have to wait to be sure the DMA update has happened + * (chip resets head/tail to 0 on transition to enable). + */ + if (rcd->rcvhdrtail_kvaddr) + qib_clear_rcvhdrtail(rcd); + + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_TIDFLOW_ENB, + rcd->ctxt); + + /* Notify any waiting slaves */ + if (rcd->subctxt_cnt) { + clear_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag); + wake_up(&rcd->wait); + } + return 0; + +bail_pio: + qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt, + TXCHK_CHG_TYPE_KERN, rcd); +bail: + return ret; +} + +/** + * unlock_exptid - unlock any expected TID entries context still had in use + * @rcd: ctxt + * + * We don't actually update the chip here, because we do a bulk update + * below, using f_clear_tids. + */ +static void unlock_expected_tids(struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + int ctxt_tidbase = rcd->ctxt * dd->rcvtidcnt; + int i, cnt = 0, maxtid = ctxt_tidbase + dd->rcvtidcnt; + + for (i = ctxt_tidbase; i < maxtid; i++) { + struct page *p = dd->pageshadow[i]; + dma_addr_t phys; + + if (!p) + continue; + + phys = dd->physshadow[i]; + dd->physshadow[i] = dd->tidinvalid; + dd->pageshadow[i] = NULL; + pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + qib_release_user_pages(&p, 1); + cnt++; + } +} + +static int qib_close(struct inode *in, struct file *fp) +{ + int ret = 0; + struct qib_filedata *fd; + struct qib_ctxtdata *rcd; + struct qib_devdata *dd; + unsigned long flags; + unsigned ctxt; + pid_t pid; + + mutex_lock(&qib_mutex); + + fd = fp->private_data; + fp->private_data = NULL; + rcd = fd->rcd; + if (!rcd) { + mutex_unlock(&qib_mutex); + goto bail; + } + + dd = rcd->dd; + + /* ensure all pio buffer writes in progress are flushed */ + qib_flush_wc(); + + /* drain user sdma queue */ + if (fd->pq) { + qib_user_sdma_queue_drain(rcd->ppd, fd->pq); + qib_user_sdma_queue_destroy(fd->pq); + } + + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + + if (--rcd->cnt) { + /* + * XXX If the master closes the context before the slave(s), + * revoke the mmap for the eager receive queue so + * the slave(s) don't wait for receive data forever. + */ + rcd->active_slaves &= ~(1 << fd->subctxt); + rcd->subpid[fd->subctxt] = 0; + mutex_unlock(&qib_mutex); + goto bail; + } + + /* early; no interrupt users after this */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + ctxt = rcd->ctxt; + dd->rcd[ctxt] = NULL; + pid = rcd->pid; + rcd->pid = 0; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + if (rcd->rcvwait_to || rcd->piowait_to || + rcd->rcvnowait || rcd->pionowait) { + rcd->rcvwait_to = 0; + rcd->piowait_to = 0; + rcd->rcvnowait = 0; + rcd->pionowait = 0; + } + if (rcd->flag) + rcd->flag = 0; + + if (dd->kregbase) { + /* atomically clear receive enable ctxt and intr avail. */ + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_DIS | + QIB_RCVCTRL_INTRAVAIL_DIS, ctxt); + + /* clean up the pkeys for this ctxt user */ + qib_clean_part_key(rcd, dd); + qib_disarm_piobufs(dd, rcd->pio_base, rcd->piocnt); + qib_chg_pioavailkernel(dd, rcd->pio_base, + rcd->piocnt, TXCHK_CHG_TYPE_KERN, NULL); + + dd->f_clear_tids(dd, rcd); + + if (dd->pageshadow) + unlock_expected_tids(rcd); + qib_stats.sps_ctxts--; + dd->freectxts++; + } + + mutex_unlock(&qib_mutex); + qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */ + +bail: + kfree(fd); + return ret; +} + +static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo) +{ + struct qib_ctxt_info info; + int ret; + size_t sz; + struct qib_ctxtdata *rcd = ctxt_fp(fp); + struct qib_filedata *fd; + + fd = fp->private_data; + + info.num_active = qib_count_active_units(); + info.unit = rcd->dd->unit; + info.port = rcd->ppd->port; + info.ctxt = rcd->ctxt; + info.subctxt = subctxt_fp(fp); + /* Number of user ctxts available for this device. */ + info.num_ctxts = rcd->dd->cfgctxts - rcd->dd->first_user_ctxt; + info.num_subctxts = rcd->subctxt_cnt; + info.rec_cpu = fd->rec_cpu_num; + sz = sizeof(info); + + if (copy_to_user(uinfo, &info, sz)) { + ret = -EFAULT; + goto bail; + } + ret = 0; + +bail: + return ret; +} + +static int qib_sdma_get_inflight(struct qib_user_sdma_queue *pq, + u32 __user *inflightp) +{ + const u32 val = qib_user_sdma_inflight_counter(pq); + + if (put_user(val, inflightp)) + return -EFAULT; + + return 0; +} + +static int qib_sdma_get_complete(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + u32 __user *completep) +{ + u32 val; + int err; + + if (!pq) + return -EINVAL; + + err = qib_user_sdma_make_progress(ppd, pq); + if (err < 0) + return err; + + val = qib_user_sdma_complete_counter(pq); + if (put_user(val, completep)) + return -EFAULT; + + return 0; +} + +static int disarm_req_delay(struct qib_ctxtdata *rcd) +{ + int ret = 0; + + if (!usable(rcd->ppd)) { + int i; + /* + * if link is down, or otherwise not usable, delay + * the caller up to 30 seconds, so we don't thrash + * in trying to get the chip back to ACTIVE, and + * set flag so they make the call again. + */ + if (rcd->user_event_mask) { + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + for (i = 0; !usable(rcd->ppd) && i < 300; i++) + msleep(100); + ret = -ENETDOWN; + } + return ret; +} + +/* + * Find all user contexts in use, and set the specified bit in their + * event mask. + * See also find_ctxt() for a similar use, that is specific to send buffers. + */ +int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit) +{ + struct qib_ctxtdata *rcd; + unsigned ctxt; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&ppd->dd->uctxt_lock, flags); + for (ctxt = ppd->dd->first_user_ctxt; ctxt < ppd->dd->cfgctxts; + ctxt++) { + rcd = ppd->dd->rcd[ctxt]; + if (!rcd) + continue; + if (rcd->user_event_mask) { + int i; + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + set_bit(evtbit, &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(evtbit, &rcd->user_event_mask[i]); + } + ret = 1; + break; + } + spin_unlock_irqrestore(&ppd->dd->uctxt_lock, flags); + + return ret; +} + +/* + * clear the event notifier events for this context. + * For the DISARM_BUFS case, we also take action (this obsoletes + * the older QIB_CMD_DISARM_BUFS, but we keep it for backwards + * compatibility. + * Other bits don't currently require actions, just atomically clear. + * User process then performs actions appropriate to bit having been + * set, if desired, and checks again in future. + */ +static int qib_user_event_ack(struct qib_ctxtdata *rcd, int subctxt, + unsigned long events) +{ + int ret = 0, i; + + for (i = 0; i <= _QIB_MAX_EVENT_BIT; i++) { + if (!test_bit(i, &events)) + continue; + if (i == _QIB_EVENT_DISARM_BUFS_BIT) { + (void)qib_disarm_piobufs_ifneeded(rcd); + ret = disarm_req_delay(rcd); + } else + clear_bit(i, &rcd->user_event_mask[subctxt]); + } + return ret; +} + +static ssize_t qib_write(struct file *fp, const char __user *data, + size_t count, loff_t *off) +{ + const struct qib_cmd __user *ucmd; + struct qib_ctxtdata *rcd; + const void __user *src; + size_t consumed, copy = 0; + struct qib_cmd cmd; + ssize_t ret = 0; + void *dest; + + if (count < sizeof(cmd.type)) { + ret = -EINVAL; + goto bail; + } + + ucmd = (const struct qib_cmd __user *) data; + + if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { + ret = -EFAULT; + goto bail; + } + + consumed = sizeof(cmd.type); + + switch (cmd.type) { + case QIB_CMD_ASSIGN_CTXT: + case QIB_CMD_USER_INIT: + copy = sizeof(cmd.cmd.user_info); + dest = &cmd.cmd.user_info; + src = &ucmd->cmd.user_info; + break; + + case QIB_CMD_RECV_CTRL: + copy = sizeof(cmd.cmd.recv_ctrl); + dest = &cmd.cmd.recv_ctrl; + src = &ucmd->cmd.recv_ctrl; + break; + + case QIB_CMD_CTXT_INFO: + copy = sizeof(cmd.cmd.ctxt_info); + dest = &cmd.cmd.ctxt_info; + src = &ucmd->cmd.ctxt_info; + break; + + case QIB_CMD_TID_UPDATE: + case QIB_CMD_TID_FREE: + copy = sizeof(cmd.cmd.tid_info); + dest = &cmd.cmd.tid_info; + src = &ucmd->cmd.tid_info; + break; + + case QIB_CMD_SET_PART_KEY: + copy = sizeof(cmd.cmd.part_key); + dest = &cmd.cmd.part_key; + src = &ucmd->cmd.part_key; + break; + + case QIB_CMD_DISARM_BUFS: + case QIB_CMD_PIOAVAILUPD: /* force an update of PIOAvail reg */ + copy = 0; + src = NULL; + dest = NULL; + break; + + case QIB_CMD_POLL_TYPE: + copy = sizeof(cmd.cmd.poll_type); + dest = &cmd.cmd.poll_type; + src = &ucmd->cmd.poll_type; + break; + + case QIB_CMD_ARMLAUNCH_CTRL: + copy = sizeof(cmd.cmd.armlaunch_ctrl); + dest = &cmd.cmd.armlaunch_ctrl; + src = &ucmd->cmd.armlaunch_ctrl; + break; + + case QIB_CMD_SDMA_INFLIGHT: + copy = sizeof(cmd.cmd.sdma_inflight); + dest = &cmd.cmd.sdma_inflight; + src = &ucmd->cmd.sdma_inflight; + break; + + case QIB_CMD_SDMA_COMPLETE: + copy = sizeof(cmd.cmd.sdma_complete); + dest = &cmd.cmd.sdma_complete; + src = &ucmd->cmd.sdma_complete; + break; + + case QIB_CMD_ACK_EVENT: + copy = sizeof(cmd.cmd.event_mask); + dest = &cmd.cmd.event_mask; + src = &ucmd->cmd.event_mask; + break; + + default: + ret = -EINVAL; + goto bail; + } + + if (copy) { + if ((count - consumed) < copy) { + ret = -EINVAL; + goto bail; + } + if (copy_from_user(dest, src, copy)) { + ret = -EFAULT; + goto bail; + } + consumed += copy; + } + + rcd = ctxt_fp(fp); + if (!rcd && cmd.type != QIB_CMD_ASSIGN_CTXT) { + ret = -EINVAL; + goto bail; + } + + switch (cmd.type) { + case QIB_CMD_ASSIGN_CTXT: + ret = qib_assign_ctxt(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + break; + + case QIB_CMD_USER_INIT: + ret = qib_do_user_init(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + ret = qib_get_base_info(fp, (void __user *) (unsigned long) + cmd.cmd.user_info.spu_base_info, + cmd.cmd.user_info.spu_base_info_size); + break; + + case QIB_CMD_RECV_CTRL: + ret = qib_manage_rcvq(rcd, subctxt_fp(fp), cmd.cmd.recv_ctrl); + break; + + case QIB_CMD_CTXT_INFO: + ret = qib_ctxt_info(fp, (struct qib_ctxt_info __user *) + (unsigned long) cmd.cmd.ctxt_info); + break; + + case QIB_CMD_TID_UPDATE: + ret = qib_tid_update(rcd, fp, &cmd.cmd.tid_info); + break; + + case QIB_CMD_TID_FREE: + ret = qib_tid_free(rcd, subctxt_fp(fp), &cmd.cmd.tid_info); + break; + + case QIB_CMD_SET_PART_KEY: + ret = qib_set_part_key(rcd, cmd.cmd.part_key); + break; + + case QIB_CMD_DISARM_BUFS: + (void)qib_disarm_piobufs_ifneeded(rcd); + ret = disarm_req_delay(rcd); + break; + + case QIB_CMD_PIOAVAILUPD: + qib_force_pio_avail_update(rcd->dd); + break; + + case QIB_CMD_POLL_TYPE: + rcd->poll_type = cmd.cmd.poll_type; + break; + + case QIB_CMD_ARMLAUNCH_CTRL: + rcd->dd->f_set_armlaunch(rcd->dd, cmd.cmd.armlaunch_ctrl); + break; + + case QIB_CMD_SDMA_INFLIGHT: + ret = qib_sdma_get_inflight(user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_inflight); + break; + + case QIB_CMD_SDMA_COMPLETE: + ret = qib_sdma_get_complete(rcd->ppd, + user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_complete); + break; + + case QIB_CMD_ACK_EVENT: + ret = qib_user_event_ack(rcd, subctxt_fp(fp), + cmd.cmd.event_mask); + break; + } + + if (ret >= 0) + ret = consumed; + +bail: + return ret; +} + +static ssize_t qib_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long dim, loff_t off) +{ + struct qib_filedata *fp = iocb->ki_filp->private_data; + struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp); + struct qib_user_sdma_queue *pq = fp->pq; + + if (!dim || !pq) + return -EINVAL; + + return qib_user_sdma_writev(rcd, pq, iov, dim); +} + +static struct class *qib_class; +static dev_t qib_dev; + +int qib_cdev_init(int minor, const char *name, + const struct file_operations *fops, + struct cdev **cdevp, struct device **devp) +{ + const dev_t dev = MKDEV(MAJOR(qib_dev), minor); + struct cdev *cdev; + struct device *device = NULL; + int ret; + + cdev = cdev_alloc(); + if (!cdev) { + printk(KERN_ERR QIB_DRV_NAME + ": Could not allocate cdev for minor %d, %s\n", + minor, name); + ret = -ENOMEM; + goto done; + } + + cdev->owner = THIS_MODULE; + cdev->ops = fops; + kobject_set_name(&cdev->kobj, name); + + ret = cdev_add(cdev, dev, 1); + if (ret < 0) { + printk(KERN_ERR QIB_DRV_NAME + ": Could not add cdev for minor %d, %s (err %d)\n", + minor, name, -ret); + goto err_cdev; + } + + device = device_create(qib_class, NULL, dev, NULL, name); + if (!IS_ERR(device)) + goto done; + ret = PTR_ERR(device); + device = NULL; + printk(KERN_ERR QIB_DRV_NAME ": Could not create " + "device for minor %d, %s (err %d)\n", + minor, name, -ret); +err_cdev: + cdev_del(cdev); + cdev = NULL; +done: + *cdevp = cdev; + *devp = device; + return ret; +} + +void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp) +{ + struct device *device = *devp; + + if (device) { + device_unregister(device); + *devp = NULL; + } + + if (*cdevp) { + cdev_del(*cdevp); + *cdevp = NULL; + } +} + +static struct cdev *wildcard_cdev; +static struct device *wildcard_device; + +int __init qib_dev_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); + if (ret < 0) { + printk(KERN_ERR QIB_DRV_NAME ": Could not allocate " + "chrdev region (err %d)\n", -ret); + goto done; + } + + qib_class = class_create(THIS_MODULE, "ipath"); + if (IS_ERR(qib_class)) { + ret = PTR_ERR(qib_class); + printk(KERN_ERR QIB_DRV_NAME ": Could not create " + "device class (err %d)\n", -ret); + unregister_chrdev_region(qib_dev, QIB_NMINORS); + } + +done: + return ret; +} + +void qib_dev_cleanup(void) +{ + if (qib_class) { + class_destroy(qib_class); + qib_class = NULL; + } + + unregister_chrdev_region(qib_dev, QIB_NMINORS); +} + +static atomic_t user_count = ATOMIC_INIT(0); + +static void qib_user_remove(struct qib_devdata *dd) +{ + if (atomic_dec_return(&user_count) == 0) + qib_cdev_cleanup(&wildcard_cdev, &wildcard_device); + + qib_cdev_cleanup(&dd->user_cdev, &dd->user_device); +} + +static int qib_user_add(struct qib_devdata *dd) +{ + char name[10]; + int ret; + + if (atomic_inc_return(&user_count) == 1) { + ret = qib_cdev_init(0, "ipath", &qib_file_ops, + &wildcard_cdev, &wildcard_device); + if (ret) + goto done; + } + + snprintf(name, sizeof(name), "ipath%d", dd->unit); + ret = qib_cdev_init(dd->unit + 1, name, &qib_file_ops, + &dd->user_cdev, &dd->user_device); + if (ret) + qib_user_remove(dd); +done: + return ret; +} + +/* + * Create per-unit files in /dev + */ +int qib_device_create(struct qib_devdata *dd) +{ + int r, ret; + + r = qib_user_add(dd); + ret = qib_diag_add(dd); + if (r && !ret) + ret = r; + return ret; +} + +/* + * Remove per-unit files in /dev + * void, core kernel returns no errors for this stuff + */ +void qib_device_remove(struct qib_devdata *dd) +{ + qib_user_remove(dd); + qib_diag_remove(dd); +} diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c new file mode 100644 index 00000000..05e0f17c --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "qib.h" + +#define QIBFS_MAGIC 0x726a77 + +static struct super_block *qib_super; + +#define private2dd(file) ((file)->f_dentry->d_inode->i_private) + +static int qibfs_mknod(struct inode *dir, struct dentry *dentry, + umode_t mode, const struct file_operations *fops, + void *data) +{ + int error; + struct inode *inode = new_inode(dir->i_sb); + + if (!inode) { + error = -EPERM; + goto bail; + } + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_blocks = 0; + inode->i_atime = CURRENT_TIME; + inode->i_mtime = inode->i_atime; + inode->i_ctime = inode->i_atime; + inode->i_private = data; + if (S_ISDIR(mode)) { + inode->i_op = &simple_dir_inode_operations; + inc_nlink(inode); + inc_nlink(dir); + } + + inode->i_fop = fops; + + d_instantiate(dentry, inode); + error = 0; + +bail: + return error; +} + +static int create_file(const char *name, umode_t mode, + struct dentry *parent, struct dentry **dentry, + const struct file_operations *fops, void *data) +{ + int error; + + *dentry = NULL; + mutex_lock(&parent->d_inode->i_mutex); + *dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(*dentry)) + error = qibfs_mknod(parent->d_inode, *dentry, + mode, fops, data); + else + error = PTR_ERR(*dentry); + mutex_unlock(&parent->d_inode->i_mutex); + + return error; +} + +static ssize_t driver_stats_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, &qib_stats, + sizeof qib_stats); +} + +/* + * driver stats field names, one line per stat, single string. Used by + * programs like ipathstats to print the stats in a way which works for + * different versions of drivers, without changing program source. + * if qlogic_ib_stats changes, this needs to change. Names need to be + * 12 chars or less (w/o newline), for proper display by ipathstats utility. + */ +static const char qib_statnames[] = + "KernIntr\n" + "ErrorIntr\n" + "Tx_Errs\n" + "Rcv_Errs\n" + "H/W_Errs\n" + "NoPIOBufs\n" + "CtxtsOpen\n" + "RcvLen_Errs\n" + "EgrBufFull\n" + "EgrHdrFull\n" + ; + +static ssize_t driver_names_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, qib_statnames, + sizeof qib_statnames - 1); /* no null */ +} + +static const struct file_operations driver_ops[] = { + { .read = driver_stats_read, .llseek = generic_file_llseek, }, + { .read = driver_names_read, .llseek = generic_file_llseek, }, +}; + +/* read the per-device counters */ +static ssize_t dev_counters_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 *counters; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_cntrs(dd, *ppos, NULL, &counters); + return simple_read_from_buffer(buf, count, ppos, counters, avail); +} + +/* read the per-device counters */ +static ssize_t dev_names_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char *names; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_cntrs(dd, *ppos, &names, NULL); + return simple_read_from_buffer(buf, count, ppos, names, avail); +} + +static const struct file_operations cntr_ops[] = { + { .read = dev_counters_read, .llseek = generic_file_llseek, }, + { .read = dev_names_read, .llseek = generic_file_llseek, }, +}; + +/* + * Could use file->f_dentry->d_inode->i_ino to figure out which file, + * instead of separate routine for each, but for now, this works... + */ + +/* read the per-port names (same for each port) */ +static ssize_t portnames_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char *names; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_portcntrs(dd, *ppos, 0, &names, NULL); + return simple_read_from_buffer(buf, count, ppos, names, avail); +} + +/* read the per-port counters for port 1 (pidx 0) */ +static ssize_t portcntrs_1_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 *counters; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_portcntrs(dd, *ppos, 0, NULL, &counters); + return simple_read_from_buffer(buf, count, ppos, counters, avail); +} + +/* read the per-port counters for port 2 (pidx 1) */ +static ssize_t portcntrs_2_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 *counters; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_portcntrs(dd, *ppos, 1, NULL, &counters); + return simple_read_from_buffer(buf, count, ppos, counters, avail); +} + +static const struct file_operations portcntr_ops[] = { + { .read = portnames_read, .llseek = generic_file_llseek, }, + { .read = portcntrs_1_read, .llseek = generic_file_llseek, }, + { .read = portcntrs_2_read, .llseek = generic_file_llseek, }, +}; + +/* + * read the per-port QSFP data for port 1 (pidx 0) + */ +static ssize_t qsfp_1_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd = private2dd(file); + char *tmp; + int ret; + + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + ret = qib_qsfp_dump(dd->pport, tmp, PAGE_SIZE); + if (ret > 0) + ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); + kfree(tmp); + return ret; +} + +/* + * read the per-port QSFP data for port 2 (pidx 1) + */ +static ssize_t qsfp_2_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd = private2dd(file); + char *tmp; + int ret; + + if (dd->num_pports < 2) + return -ENODEV; + + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + ret = qib_qsfp_dump(dd->pport + 1, tmp, PAGE_SIZE); + if (ret > 0) + ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); + kfree(tmp); + return ret; +} + +static const struct file_operations qsfp_ops[] = { + { .read = qsfp_1_read, .llseek = generic_file_llseek, }, + { .read = qsfp_2_read, .llseek = generic_file_llseek, }, +}; + +static ssize_t flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd; + ssize_t ret; + loff_t pos; + char *tmp; + + pos = *ppos; + + if (pos < 0) { + ret = -EINVAL; + goto bail; + } + + if (pos >= sizeof(struct qib_flash)) { + ret = 0; + goto bail; + } + + if (count > sizeof(struct qib_flash) - pos) + count = sizeof(struct qib_flash) - pos; + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) { + ret = -ENOMEM; + goto bail; + } + + dd = private2dd(file); + if (qib_eeprom_read(dd, pos, tmp, count)) { + qib_dev_err(dd, "failed to read from flash\n"); + ret = -ENXIO; + goto bail_tmp; + } + + if (copy_to_user(buf, tmp, count)) { + ret = -EFAULT; + goto bail_tmp; + } + + *ppos = pos + count; + ret = count; + +bail_tmp: + kfree(tmp); + +bail: + return ret; +} + +static ssize_t flash_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd; + ssize_t ret; + loff_t pos; + char *tmp; + + pos = *ppos; + + if (pos != 0) { + ret = -EINVAL; + goto bail; + } + + if (count != sizeof(struct qib_flash)) { + ret = -EINVAL; + goto bail; + } + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) { + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmp, buf, count)) { + ret = -EFAULT; + goto bail_tmp; + } + + dd = private2dd(file); + if (qib_eeprom_write(dd, pos, tmp, count)) { + ret = -ENXIO; + qib_dev_err(dd, "failed to write to flash\n"); + goto bail_tmp; + } + + *ppos = pos + count; + ret = count; + +bail_tmp: + kfree(tmp); + +bail: + return ret; +} + +static const struct file_operations flash_ops = { + .read = flash_read, + .write = flash_write, + .llseek = default_llseek, +}; + +static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) +{ + struct dentry *dir, *tmp; + char unit[10]; + int ret, i; + + /* create the per-unit directory */ + snprintf(unit, sizeof unit, "%u", dd->unit); + ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, + &simple_dir_operations, dd); + if (ret) { + printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); + goto bail; + } + + /* create the files in the new directory */ + ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp, + &cntr_ops[0], dd); + if (ret) { + printk(KERN_ERR "create_file(%s/counters) failed: %d\n", + unit, ret); + goto bail; + } + ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp, + &cntr_ops[1], dd); + if (ret) { + printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n", + unit, ret); + goto bail; + } + ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp, + &portcntr_ops[0], dd); + if (ret) { + printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + unit, "portcounter_names", ret); + goto bail; + } + for (i = 1; i <= dd->num_pports; i++) { + char fname[24]; + + sprintf(fname, "port%dcounters", i); + /* create the files in the new directory */ + ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, + &portcntr_ops[i], dd); + if (ret) { + printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + unit, fname, ret); + goto bail; + } + if (!(dd->flags & QIB_HAS_QSFP)) + continue; + sprintf(fname, "qsfp%d", i); + ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, + &qsfp_ops[i - 1], dd); + if (ret) { + printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + unit, fname, ret); + goto bail; + } + } + + ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, + &flash_ops, dd); + if (ret) + printk(KERN_ERR "create_file(%s/flash) failed: %d\n", + unit, ret); +bail: + return ret; +} + +static int remove_file(struct dentry *parent, char *name) +{ + struct dentry *tmp; + int ret; + + tmp = lookup_one_len(name, parent, strlen(name)); + + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto bail; + } + + spin_lock(&tmp->d_lock); + if (!(d_unhashed(tmp) && tmp->d_inode)) { + dget_dlock(tmp); + __d_drop(tmp); + spin_unlock(&tmp->d_lock); + simple_unlink(parent->d_inode, tmp); + } else { + spin_unlock(&tmp->d_lock); + } + + ret = 0; +bail: + /* + * We don't expect clients to care about the return value, but + * it's there if they need it. + */ + return ret; +} + +static int remove_device_files(struct super_block *sb, + struct qib_devdata *dd) +{ + struct dentry *dir, *root; + char unit[10]; + int ret, i; + + root = dget(sb->s_root); + mutex_lock(&root->d_inode->i_mutex); + snprintf(unit, sizeof unit, "%u", dd->unit); + dir = lookup_one_len(unit, root, strlen(unit)); + + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + printk(KERN_ERR "Lookup of %s failed\n", unit); + goto bail; + } + + remove_file(dir, "counters"); + remove_file(dir, "counter_names"); + remove_file(dir, "portcounter_names"); + for (i = 0; i < dd->num_pports; i++) { + char fname[24]; + + sprintf(fname, "port%dcounters", i + 1); + remove_file(dir, fname); + if (dd->flags & QIB_HAS_QSFP) { + sprintf(fname, "qsfp%d", i + 1); + remove_file(dir, fname); + } + } + remove_file(dir, "flash"); + d_delete(dir); + ret = simple_rmdir(root->d_inode, dir); + +bail: + mutex_unlock(&root->d_inode->i_mutex); + dput(root); + return ret; +} + +/* + * This fills everything in when the fs is mounted, to handle umount/mount + * after device init. The direct add_cntr_files() call handles adding + * them from the init code, when the fs is already mounted. + */ +static int qibfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct qib_devdata *dd, *tmp; + unsigned long flags; + int ret; + + static struct tree_descr files[] = { + [2] = {"driver_stats", &driver_ops[0], S_IRUGO}, + [3] = {"driver_stats_names", &driver_ops[1], S_IRUGO}, + {""}, + }; + + ret = simple_fill_super(sb, QIBFS_MAGIC, files); + if (ret) { + printk(KERN_ERR "simple_fill_super failed: %d\n", ret); + goto bail; + } + + spin_lock_irqsave(&qib_devs_lock, flags); + + list_for_each_entry_safe(dd, tmp, &qib_dev_list, list) { + spin_unlock_irqrestore(&qib_devs_lock, flags); + ret = add_cntr_files(sb, dd); + if (ret) + goto bail; + spin_lock_irqsave(&qib_devs_lock, flags); + } + + spin_unlock_irqrestore(&qib_devs_lock, flags); + +bail: + return ret; +} + +static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + struct dentry *ret; + ret = mount_single(fs_type, flags, data, qibfs_fill_super); + if (!IS_ERR(ret)) + qib_super = ret->d_sb; + return ret; +} + +static void qibfs_kill_super(struct super_block *s) +{ + kill_litter_super(s); + qib_super = NULL; +} + +int qibfs_add(struct qib_devdata *dd) +{ + int ret; + + /* + * On first unit initialized, qib_super will not yet exist + * because nobody has yet tried to mount the filesystem, so + * we can't consider that to be an error; if an error occurs + * during the mount, that will get a complaint, so this is OK. + * add_cntr_files() for all units is done at mount from + * qibfs_fill_super(), so one way or another, everything works. + */ + if (qib_super == NULL) + ret = 0; + else + ret = add_cntr_files(qib_super, dd); + return ret; +} + +int qibfs_remove(struct qib_devdata *dd) +{ + int ret = 0; + + if (qib_super) + ret = remove_device_files(qib_super, dd); + + return ret; +} + +static struct file_system_type qibfs_fs_type = { + .owner = THIS_MODULE, + .name = "ipathfs", + .mount = qibfs_mount, + .kill_sb = qibfs_kill_super, +}; + +int __init qib_init_qibfs(void) +{ + return register_filesystem(&qibfs_fs_type); +} + +int __exit qib_exit_qibfs(void) +{ + return unregister_filesystem(&qibfs_fs_type); +} diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c new file mode 100644 index 00000000..d0c64d51 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -0,0 +1,3579 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* + * This file contains all of the code that is specific to the + * QLogic_IB 6120 PCIe chip. + */ + +#include +#include +#include +#include + +#include "qib.h" +#include "qib_6120_regs.h" + +static void qib_6120_setup_setextled(struct qib_pportdata *, u32); +static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op); +static u8 qib_6120_phys_portstate(u64); +static u32 qib_6120_iblink_state(u64); + +/* + * This file contains all the chip-specific register information and + * access functions for the QLogic QLogic_IB PCI-Express chip. + * + */ + +/* KREG_IDX uses machine-generated #defines */ +#define KREG_IDX(regname) (QIB_6120_##regname##_OFFS / sizeof(u64)) + +/* Use defines to tie machine-generated names to lower-case names */ +#define kr_extctrl KREG_IDX(EXTCtrl) +#define kr_extstatus KREG_IDX(EXTStatus) +#define kr_gpio_clear KREG_IDX(GPIOClear) +#define kr_gpio_mask KREG_IDX(GPIOMask) +#define kr_gpio_out KREG_IDX(GPIOOut) +#define kr_gpio_status KREG_IDX(GPIOStatus) +#define kr_rcvctrl KREG_IDX(RcvCtrl) +#define kr_sendctrl KREG_IDX(SendCtrl) +#define kr_partitionkey KREG_IDX(RcvPartitionKey) +#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl) +#define kr_ibcstatus KREG_IDX(IBCStatus) +#define kr_ibcctrl KREG_IDX(IBCCtrl) +#define kr_sendbuffererror KREG_IDX(SendBufErr0) +#define kr_rcvbthqp KREG_IDX(RcvBTHQP) +#define kr_counterregbase KREG_IDX(CntrRegBase) +#define kr_palign KREG_IDX(PageAlign) +#define kr_rcvegrbase KREG_IDX(RcvEgrBase) +#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt) +#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt) +#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize) +#define kr_rcvhdrsize KREG_IDX(RcvHdrSize) +#define kr_rcvtidbase KREG_IDX(RcvTIDBase) +#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt) +#define kr_scratch KREG_IDX(Scratch) +#define kr_sendctrl KREG_IDX(SendCtrl) +#define kr_sendpioavailaddr KREG_IDX(SendPIOAvailAddr) +#define kr_sendpiobufbase KREG_IDX(SendPIOBufBase) +#define kr_sendpiobufcnt KREG_IDX(SendPIOBufCnt) +#define kr_sendpiosize KREG_IDX(SendPIOSize) +#define kr_sendregbase KREG_IDX(SendRegBase) +#define kr_userregbase KREG_IDX(UserRegBase) +#define kr_control KREG_IDX(Control) +#define kr_intclear KREG_IDX(IntClear) +#define kr_intmask KREG_IDX(IntMask) +#define kr_intstatus KREG_IDX(IntStatus) +#define kr_errclear KREG_IDX(ErrClear) +#define kr_errmask KREG_IDX(ErrMask) +#define kr_errstatus KREG_IDX(ErrStatus) +#define kr_hwerrclear KREG_IDX(HwErrClear) +#define kr_hwerrmask KREG_IDX(HwErrMask) +#define kr_hwerrstatus KREG_IDX(HwErrStatus) +#define kr_revision KREG_IDX(Revision) +#define kr_portcnt KREG_IDX(PortCnt) +#define kr_serdes_cfg0 KREG_IDX(SerdesCfg0) +#define kr_serdes_cfg1 (kr_serdes_cfg0 + 1) +#define kr_serdes_stat KREG_IDX(SerdesStat) +#define kr_xgxs_cfg KREG_IDX(XGXSCfg) + +/* These must only be written via qib_write_kreg_ctxt() */ +#define kr_rcvhdraddr KREG_IDX(RcvHdrAddr0) +#define kr_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0) + +#define CREG_IDX(regname) ((QIB_6120_##regname##_OFFS - \ + QIB_6120_LBIntCnt_OFFS) / sizeof(u64)) + +#define cr_badformat CREG_IDX(RxBadFormatCnt) +#define cr_erricrc CREG_IDX(RxICRCErrCnt) +#define cr_errlink CREG_IDX(RxLinkProblemCnt) +#define cr_errlpcrc CREG_IDX(RxLPCRCErrCnt) +#define cr_errpkey CREG_IDX(RxPKeyMismatchCnt) +#define cr_rcvflowctrl_err CREG_IDX(RxFlowCtrlErrCnt) +#define cr_err_rlen CREG_IDX(RxLenErrCnt) +#define cr_errslen CREG_IDX(TxLenErrCnt) +#define cr_errtidfull CREG_IDX(RxTIDFullErrCnt) +#define cr_errtidvalid CREG_IDX(RxTIDValidErrCnt) +#define cr_errvcrc CREG_IDX(RxVCRCErrCnt) +#define cr_ibstatuschange CREG_IDX(IBStatusChangeCnt) +#define cr_lbint CREG_IDX(LBIntCnt) +#define cr_invalidrlen CREG_IDX(RxMaxMinLenErrCnt) +#define cr_invalidslen CREG_IDX(TxMaxMinLenErrCnt) +#define cr_lbflowstall CREG_IDX(LBFlowStallCnt) +#define cr_pktrcv CREG_IDX(RxDataPktCnt) +#define cr_pktrcvflowctrl CREG_IDX(RxFlowPktCnt) +#define cr_pktsend CREG_IDX(TxDataPktCnt) +#define cr_pktsendflow CREG_IDX(TxFlowPktCnt) +#define cr_portovfl CREG_IDX(RxP0HdrEgrOvflCnt) +#define cr_rcvebp CREG_IDX(RxEBPCnt) +#define cr_rcvovfl CREG_IDX(RxBufOvflCnt) +#define cr_senddropped CREG_IDX(TxDroppedPktCnt) +#define cr_sendstall CREG_IDX(TxFlowStallCnt) +#define cr_sendunderrun CREG_IDX(TxUnderrunCnt) +#define cr_wordrcv CREG_IDX(RxDwordCnt) +#define cr_wordsend CREG_IDX(TxDwordCnt) +#define cr_txunsupvl CREG_IDX(TxUnsupVLErrCnt) +#define cr_rxdroppkt CREG_IDX(RxDroppedPktCnt) +#define cr_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt) +#define cr_iblinkdown CREG_IDX(IBLinkDownedCnt) +#define cr_ibsymbolerr CREG_IDX(IBSymbolErrCnt) + +#define SYM_RMASK(regname, fldname) ((u64) \ + QIB_6120_##regname##_##fldname##_RMASK) +#define SYM_MASK(regname, fldname) ((u64) \ + QIB_6120_##regname##_##fldname##_RMASK << \ + QIB_6120_##regname##_##fldname##_LSB) +#define SYM_LSB(regname, fldname) (QIB_6120_##regname##_##fldname##_LSB) + +#define SYM_FIELD(value, regname, fldname) ((u64) \ + (((value) >> SYM_LSB(regname, fldname)) & \ + SYM_RMASK(regname, fldname))) +#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask) +#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask) + +/* link training states, from IBC */ +#define IB_6120_LT_STATE_DISABLED 0x00 +#define IB_6120_LT_STATE_LINKUP 0x01 +#define IB_6120_LT_STATE_POLLACTIVE 0x02 +#define IB_6120_LT_STATE_POLLQUIET 0x03 +#define IB_6120_LT_STATE_SLEEPDELAY 0x04 +#define IB_6120_LT_STATE_SLEEPQUIET 0x05 +#define IB_6120_LT_STATE_CFGDEBOUNCE 0x08 +#define IB_6120_LT_STATE_CFGRCVFCFG 0x09 +#define IB_6120_LT_STATE_CFGWAITRMT 0x0a +#define IB_6120_LT_STATE_CFGIDLE 0x0b +#define IB_6120_LT_STATE_RECOVERRETRAIN 0x0c +#define IB_6120_LT_STATE_RECOVERWAITRMT 0x0e +#define IB_6120_LT_STATE_RECOVERIDLE 0x0f + +/* link state machine states from IBC */ +#define IB_6120_L_STATE_DOWN 0x0 +#define IB_6120_L_STATE_INIT 0x1 +#define IB_6120_L_STATE_ARM 0x2 +#define IB_6120_L_STATE_ACTIVE 0x3 +#define IB_6120_L_STATE_ACT_DEFER 0x4 + +static const u8 qib_6120_physportstate[0x20] = { + [IB_6120_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [IB_6120_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [IB_6120_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [IB_6120_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [IB_6120_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [IB_6120_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [IB_6120_LT_STATE_CFGDEBOUNCE] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_6120_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_6120_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN +}; + + +struct qib_chip_specific { + u64 __iomem *cregbase; + u64 *cntrs; + u64 *portcntrs; + void *dummy_hdrq; /* used after ctxt close */ + dma_addr_t dummy_hdrq_phys; + spinlock_t kernel_tid_lock; /* no back to back kernel TID writes */ + spinlock_t user_tid_lock; /* no back to back user TID writes */ + spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */ + spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */ + u64 hwerrmask; + u64 errormask; + u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */ + u64 gpio_mask; /* shadow the gpio mask register */ + u64 extctrl; /* shadow the gpio output enable, etc... */ + /* + * these 5 fields are used to establish deltas for IB symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + u64 ibcctrl; /* shadow for kr_ibcctrl */ + u32 lastlinkrecov; /* link recovery issue */ + int irq; + u32 cntrnamelen; + u32 portcntrnamelen; + u32 ncntrs; + u32 nportcntrs; + /* used with gpio interrupts to implement IB counters */ + u32 rxfc_unsupvl_errs; + u32 overrun_thresh_errs; + /* + * these count only cases where _successive_ LocalLinkIntegrity + * errors were seen in the receive headers of IB standard packets + */ + u32 lli_errs; + u32 lli_counter; + u64 lli_thresh; + u64 sword; /* total dwords sent (sample result) */ + u64 rword; /* total dwords received (sample result) */ + u64 spkts; /* total packets sent (sample result) */ + u64 rpkts; /* total packets received (sample result) */ + u64 xmit_wait; /* # of ticks no data sent (sample result) */ + struct timer_list pma_timer; + char emsgbuf[128]; + char bitsmsgbuf[64]; + u8 pma_sample_status; +}; + +/* ibcctrl bits */ +#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 +/* cycle through TS1/TS2 till OK */ +#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2 +/* wait for TS1, then go on */ +#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3 +#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16 + +#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ +#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ +#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ +#define QLOGIC_IB_IBCC_LINKCMD_SHIFT 18 + +/* + * We could have a single register get/put routine, that takes a group type, + * but this is somewhat clearer and cleaner. It also gives us some error + * checking. 64 bit register reads should always work, but are inefficient + * on opteron (the northbridge always generates 2 separate HT 32 bit reads), + * so we use kreg32 wherever possible. User register and counter register + * reads are always 32 bit reads, so only one form of those routines. + */ + +/** + * qib_read_ureg32 - read 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @ctxt: context number + * + * Return the contents of a register that is virtualized to be per context. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u32 qib_read_ureg32(const struct qib_devdata *dd, + enum qib_ureg regno, int ctxt) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + + if (dd->userbase) + return readl(regno + (u64 __iomem *) + ((char __iomem *)dd->userbase + + dd->ureg_align * ctxt)); + else + return readl(regno + (u64 __iomem *) + (dd->uregbase + + (char __iomem *)dd->kregbase + + dd->ureg_align * ctxt)); +} + +/** + * qib_write_ureg - write 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @value: value + * @ctxt: context + * + * Write the contents of a register that is virtualized to be per context. + */ +static inline void qib_write_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, u64 value, int ctxt) +{ + u64 __iomem *ubase; + if (dd->userbase) + ubase = (u64 __iomem *) + ((char __iomem *) dd->userbase + + dd->ureg_align * ctxt); + else + ubase = (u64 __iomem *) + (dd->uregbase + + (char __iomem *) dd->kregbase + + dd->ureg_align * ctxt); + + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &ubase[regno]); +} + +static inline u32 qib_read_kreg32(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readl((u32 __iomem *)&dd->kregbase[regno]); +} + +static inline u64 qib_read_kreg64(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + + return readq(&dd->kregbase[regno]); +} + +static inline void qib_write_kreg(const struct qib_devdata *dd, + const u16 regno, u64 value) +{ + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->kregbase[regno]); +} + +/** + * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register + * @dd: the qlogic_ib device + * @regno: the register number to write + * @ctxt: the context containing the register + * @value: the value to write + */ +static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd, + const u16 regno, unsigned ctxt, + u64 value) +{ + qib_write_kreg(dd, regno + ctxt, value); +} + +static inline void write_6120_creg(const struct qib_devdata *dd, + u16 regno, u64 value) +{ + if (dd->cspec->cregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->cspec->cregbase[regno]); +} + +static inline u64 read_6120_creg(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(&dd->cspec->cregbase[regno]); +} + +static inline u32 read_6120_creg32(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(&dd->cspec->cregbase[regno]); +} + +/* kr_control bits */ +#define QLOGIC_IB_C_RESET 1U + +/* kr_intstatus, kr_intclear, kr_intmask bits */ +#define QLOGIC_IB_I_RCVURG_MASK ((1U << 5) - 1) +#define QLOGIC_IB_I_RCVURG_SHIFT 0 +#define QLOGIC_IB_I_RCVAVAIL_MASK ((1U << 5) - 1) +#define QLOGIC_IB_I_RCVAVAIL_SHIFT 12 + +#define QLOGIC_IB_C_FREEZEMODE 0x00000002 +#define QLOGIC_IB_C_LINKENABLE 0x00000004 +#define QLOGIC_IB_I_ERROR 0x0000000080000000ULL +#define QLOGIC_IB_I_SPIOSENT 0x0000000040000000ULL +#define QLOGIC_IB_I_SPIOBUFAVAIL 0x0000000020000000ULL +#define QLOGIC_IB_I_GPIO 0x0000000010000000ULL +#define QLOGIC_IB_I_BITSEXTANT \ + ((QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT) | \ + (QLOGIC_IB_I_RCVAVAIL_MASK << \ + QLOGIC_IB_I_RCVAVAIL_SHIFT) | \ + QLOGIC_IB_I_ERROR | QLOGIC_IB_I_SPIOSENT | \ + QLOGIC_IB_I_SPIOBUFAVAIL | QLOGIC_IB_I_GPIO) + +/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT 0 +#define QLOGIC_IB_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL +#define QLOGIC_IB_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL +#define QLOGIC_IB_HWE_COREPLL_FBSLIP 0x0080000000000000ULL +#define QLOGIC_IB_HWE_COREPLL_RFSLIP 0x0100000000000000ULL +#define QLOGIC_IB_HWE_PCIE1PLLFAILED 0x0400000000000000ULL +#define QLOGIC_IB_HWE_PCIE0PLLFAILED 0x0800000000000000ULL +#define QLOGIC_IB_HWE_SERDESPLLFAILED 0x1000000000000000ULL + + +/* kr_extstatus bits */ +#define QLOGIC_IB_EXTS_FREQSEL 0x2 +#define QLOGIC_IB_EXTS_SERDESSEL 0x4 +#define QLOGIC_IB_EXTS_MEMBIST_ENDTEST 0x0000000000004000 +#define QLOGIC_IB_EXTS_MEMBIST_FOUND 0x0000000000008000 + +/* kr_xgxsconfig bits */ +#define QLOGIC_IB_XGXS_RESET 0x5ULL + +#define _QIB_GPIO_SDA_NUM 1 +#define _QIB_GPIO_SCL_NUM 0 + +/* Bits in GPIO for the added IB link interrupts */ +#define GPIO_RXUVL_BIT 3 +#define GPIO_OVRUN_BIT 4 +#define GPIO_LLI_BIT 5 +#define GPIO_ERRINTR_MASK 0x38 + + +#define QLOGIC_IB_RT_BUFSIZE_MASK 0xe0000000ULL +#define QLOGIC_IB_RT_BUFSIZE_SHIFTVAL(tid) \ + ((((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) >> 29) + 11 - 1) +#define QLOGIC_IB_RT_BUFSIZE(tid) (1 << QLOGIC_IB_RT_BUFSIZE_SHIFTVAL(tid)) +#define QLOGIC_IB_RT_IS_VALID(tid) \ + (((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) && \ + ((((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) != QLOGIC_IB_RT_BUFSIZE_MASK))) +#define QLOGIC_IB_RT_ADDR_MASK 0x1FFFFFFFULL /* 29 bits valid */ +#define QLOGIC_IB_RT_ADDR_SHIFT 10 + +#define QLOGIC_IB_R_INTRAVAIL_SHIFT 16 +#define QLOGIC_IB_R_TAILUPD_SHIFT 31 +#define IBA6120_R_PKEY_DIS_SHIFT 30 + +#define PBC_6120_VL15_SEND_CTRL (1ULL << 31) /* pbc; VL15; link_buf only */ + +#define IBCBUSFRSPCPARITYERR HWE_MASK(IBCBusFromSPCParityErr) +#define IBCBUSTOSPCPARITYERR HWE_MASK(IBCBusToSPCParityErr) + +#define SYM_MASK_BIT(regname, fldname, bit) ((u64) \ + ((1ULL << (SYM_LSB(regname, fldname) + (bit))))) + +#define TXEMEMPARITYERR_PIOBUF \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 0) +#define TXEMEMPARITYERR_PIOPBC \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 1) +#define TXEMEMPARITYERR_PIOLAUNCHFIFO \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 2) + +#define RXEMEMPARITYERR_RCVBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 0) +#define RXEMEMPARITYERR_LOOKUPQ \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 1) +#define RXEMEMPARITYERR_EXPTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 2) +#define RXEMEMPARITYERR_EAGERTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 3) +#define RXEMEMPARITYERR_FLAGBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 4) +#define RXEMEMPARITYERR_DATAINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 5) +#define RXEMEMPARITYERR_HDRINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 6) + +/* 6120 specific hardware errors... */ +static const struct qib_hwerror_msgs qib_6120_hwerror_msgs[] = { + /* generic hardware errors */ + QLOGIC_IB_HWE_MSG(IBCBUSFRSPCPARITYERR, "QIB2IB Parity"), + QLOGIC_IB_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2QIB Parity"), + + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOBUF, + "TXE PIOBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOPBC, + "TXE PIOPBC Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOLAUNCHFIFO, + "TXE PIOLAUNCHFIFO Memory Parity"), + + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_RCVBUF, + "RXE RCVBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_LOOKUPQ, + "RXE LOOKUPQ Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EAGERTID, + "RXE EAGERTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EXPTID, + "RXE EXPTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_FLAGBUF, + "RXE FLAGBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_DATAINFO, + "RXE DATAINFO Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_HDRINFO, + "RXE HDRINFO Memory Parity"), + + /* chip-specific hardware errors */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEPOISONEDTLP, + "PCIe Poisoned TLP"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT, + "PCIe completion timeout"), + /* + * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * parity or memory parity error failures, because most likely we + * won't be able to talk to the core of the chip. Nonetheless, we + * might see them, if they are in parts of the PCIe core that aren't + * essential. + */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE1PLLFAILED, + "PCIePLL1"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE0PLLFAILED, + "PCIePLL0"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXTLH, + "PCIe XTLH core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXADM, + "PCIe ADM TX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYRADM, + "PCIe ADM RX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SERDESPLLFAILED, + "SerDes PLL"), +}; + +#define TXE_PIO_PARITY (TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC) +#define _QIB_PLL_FAIL (QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP) + + /* variables for sanity checking interrupt and errors */ +#define IB_HWE_BITSEXTANT \ + (HWE_MASK(RXEMemParityErr) | \ + HWE_MASK(TXEMemParityErr) | \ + (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << \ + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) | \ + QLOGIC_IB_HWE_PCIE1PLLFAILED | \ + QLOGIC_IB_HWE_PCIE0PLLFAILED | \ + QLOGIC_IB_HWE_PCIEPOISONEDTLP | \ + QLOGIC_IB_HWE_PCIECPLTIMEOUT | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXTLH | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXADM | \ + QLOGIC_IB_HWE_PCIEBUSPARITYRADM | \ + HWE_MASK(PowerOnBISTFailed) | \ + QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP | \ + QLOGIC_IB_HWE_SERDESPLLFAILED | \ + HWE_MASK(IBCBusToSPCParityErr) | \ + HWE_MASK(IBCBusFromSPCParityErr)) + +#define IB_E_BITSEXTANT \ + (ERR_MASK(RcvFormatErr) | ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvLongPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvUnexpectedCharErr) | \ + ERR_MASK(RcvUnsupportedVLErr) | ERR_MASK(RcvEBPErr) | \ + ERR_MASK(RcvIBFlowErr) | ERR_MASK(RcvBadVersionErr) | \ + ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | \ + ERR_MASK(RcvBadTidErr) | ERR_MASK(RcvHdrLenErr) | \ + ERR_MASK(RcvHdrErr) | ERR_MASK(RcvIBLostLinkErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendMaxPktLenErr) | \ + ERR_MASK(SendUnderRunErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendPioArmLaunchErr) | \ + ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendUnsupportedVLErr) | ERR_MASK(IBStatusChanged) | \ + ERR_MASK(InvalidAddrErr) | ERR_MASK(ResetNegated) | \ + ERR_MASK(HardwareErr)) + +#define QLOGIC_IB_E_PKTERRS ( \ + ERR_MASK(SendPktLenErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | \ + ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvEBPErr)) + +/* These are all rcv-related errors which we want to count for stats */ +#define E_SUM_PKTERRS \ + (ERR_MASK(RcvHdrLenErr) | ERR_MASK(RcvBadTidErr) | \ + ERR_MASK(RcvBadVersionErr) | ERR_MASK(RcvHdrErr) | \ + ERR_MASK(RcvLongPktLenErr) | ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvFormatErr) | ERR_MASK(RcvUnsupportedVLErr) | \ + ERR_MASK(RcvUnexpectedCharErr) | ERR_MASK(RcvEBPErr)) + +/* These are all send-related errors which we want to count for stats */ +#define E_SUM_ERRS \ + (ERR_MASK(SendPioArmLaunchErr) | \ + ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnsupportedVLErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(InvalidAddrErr)) + +/* + * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore + * errors not related to freeze and cancelling buffers. Can't ignore + * armlaunch because could get more while still cleaning up, and need + * to cancel those as they happen. + */ +#define E_SPKT_ERRS_IGNORE \ + (ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendMinPktLenErr) | \ + ERR_MASK(SendPktLenErr)) + +/* + * these are errors that can occur when the link changes state while + * a packet is being sent or received. This doesn't cover things + * like EBP or VCRC that can be the result of a sending having the + * link change state, so we receive a "known bad" packet. + */ +#define E_SUM_LINK_PKTERRS \ + (ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvUnexpectedCharErr)) + +static void qib_6120_put_tid_2(struct qib_devdata *, u64 __iomem *, + u32, unsigned long); + +/* + * On platforms using this chip, and not having ordered WC stores, we + * can get TXE parity errors due to speculative reads to the PIO buffers, + * and this, due to a chip issue can result in (many) false parity error + * reports. So it's a debug print on those, and an info print on systems + * where the speculative reads don't occur. + */ +static void qib_6120_txe_recover(struct qib_devdata *dd) +{ + if (!qib_unordered_wc()) + qib_devinfo(dd->pcidev, + "Recovering from TXE PIO parity error\n"); +} + +/* enable/disable chip from delivering interrupts */ +static void qib_6120_set_intr_state(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + if (dd->flags & QIB_BADINTR) + return; + qib_write_kreg(dd, kr_intmask, ~0ULL); + /* force re-interrupt of any pending interrupts. */ + qib_write_kreg(dd, kr_intclear, 0ULL); + } else + qib_write_kreg(dd, kr_intmask, 0ULL); +} + +/* + * Try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + * This is in chip-specific code because of all of the register accesses, + * even though the details are similar on most chips + */ +static void qib_6120_clear_freeze(struct qib_devdata *dd) +{ + /* disable error interrupts, to avoid confusion */ + qib_write_kreg(dd, kr_errmask, 0ULL); + + /* also disable interrupts; errormask is sometimes overwriten */ + qib_6120_set_intr_state(dd, 0); + + qib_cancel_sends(dd->pport); + + /* clear the freeze, and be sure chip saw it */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + + /* force in-memory update now we are out of freeze */ + qib_force_pio_avail_update(dd); + + /* + * force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); + qib_6120_set_intr_state(dd, 1); +} + +/** + * qib_handle_6120_hwerrors - display hardware errors. + * @dd: the qlogic_ib device + * @msg: the output buffer + * @msgl: the size of the output buffer + * + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. Reuse the same message buffer as + * handle_6120_errors() to avoid excessive stack usage. + */ +static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, + size_t msgl) +{ + u64 hwerrs; + u32 bits, ctrl; + int isfatal = 0; + char *bitsmsg; + int log_idx; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + if (!hwerrs) + return; + if (hwerrs == ~0ULL) { + qib_dev_err(dd, "Read of hardware error status failed " + "(all bits set); ignoring\n"); + return; + } + qib_stats.sps_hwerrs++; + + /* Always clear the error status register, except MEMBISTFAIL, + * regardless of whether we continue or stop using the chip. + * We want that set so we know it failed, even across driver reload. + * We'll still ignore it in the hwerrmask. We do this partly for + * diagnostics, but also for support */ + qib_write_kreg(dd, kr_hwerrclear, + hwerrs & ~HWE_MASK(PowerOnBISTFailed)); + + hwerrs &= dd->cspec->hwerrmask; + + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + /* + * Make sure we get this much out, unless told to be quiet, + * or it's occurred within the last 5 seconds. + */ + if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID)) + qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " + "(cleared)\n", (unsigned long long) hwerrs); + + if (hwerrs & ~IB_HWE_BITSEXTANT) + qib_dev_err(dd, "hwerror interrupt with unknown errors " + "%llx set\n", (unsigned long long) + (hwerrs & ~IB_HWE_BITSEXTANT)); + + ctrl = qib_read_kreg32(dd, kr_control); + if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { + /* + * Parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & TXE_PIO_PARITY) { + qib_6120_txe_recover(dd); + hwerrs &= ~TXE_PIO_PARITY; + } + + if (!hwerrs) { + static u32 freeze_cnt; + + freeze_cnt++; + qib_6120_clear_freeze(dd); + } else + isfatal = 1; + } + + *msg = '\0'; + + if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { + isfatal = 1; + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware" + " unusable]", msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + qib_format_hwerrors(hwerrs, qib_6120_hwerror_msgs, + ARRAY_SIZE(qib_6120_hwerror_msgs), msg, msgl); + + bitsmsg = dd->cspec->bitsmsgbuf; + if (hwerrs & (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT)) { + bits = (u32) ((hwerrs >> + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) & + QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK); + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PCIe Mem Parity Errs %x] ", bits); + strlcat(msg, bitsmsg, msgl); + } + + if (hwerrs & _QIB_PLL_FAIL) { + isfatal = 1; + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PLL failed (%llx), InfiniPath hardware unusable]", + (unsigned long long) hwerrs & _QIB_PLL_FAIL); + strlcat(msg, bitsmsg, msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~(hwerrs & _QIB_PLL_FAIL); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + if (hwerrs & QLOGIC_IB_HWE_SERDESPLLFAILED) { + /* + * If it occurs, it is left masked since the external + * interface is unused + */ + dd->cspec->hwerrmask &= ~QLOGIC_IB_HWE_SERDESPLLFAILED; + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + if (hwerrs) + /* + * if any set that we aren't ignoring; only + * make the complaint once, in case it's stuck + * or recurring, and we get here multiple + * times. + */ + qib_dev_err(dd, "%s hardware error\n", msg); + else + *msg = 0; /* recovered from all of them */ + + if (isfatal && !dd->diag_client) { + qib_dev_err(dd, "Fatal Hardware Error, no longer" + " usable, SN %.16s\n", dd->serial); + /* + * for /sys status file and user programs to print; if no + * trailing brace is copied, we'll know it was truncated. + */ + if (dd->freezemsg) + snprintf(dd->freezemsg, dd->freezelen, + "{%s}", msg); + qib_disable_after_error(dd); + } +} + +/* + * Decode the error status into strings, deciding whether to always + * print * it or not depending on "normal packet errors" vs everything + * else. Return 1 if "real" errors, otherwise 0 if only packet + * errors, so caller can decide what to print with the string. + */ +static int qib_decode_6120_err(struct qib_devdata *dd, char *buf, size_t blen, + u64 err) +{ + int iserr = 1; + + *buf = '\0'; + if (err & QLOGIC_IB_E_PKTERRS) { + if (!(err & ~QLOGIC_IB_E_PKTERRS)) + iserr = 0; + if ((err & ERR_MASK(RcvICRCErr)) && + !(err&(ERR_MASK(RcvVCRCErr)|ERR_MASK(RcvEBPErr)))) + strlcat(buf, "CRC ", blen); + if (!iserr) + goto done; + } + if (err & ERR_MASK(RcvHdrLenErr)) + strlcat(buf, "rhdrlen ", blen); + if (err & ERR_MASK(RcvBadTidErr)) + strlcat(buf, "rbadtid ", blen); + if (err & ERR_MASK(RcvBadVersionErr)) + strlcat(buf, "rbadversion ", blen); + if (err & ERR_MASK(RcvHdrErr)) + strlcat(buf, "rhdr ", blen); + if (err & ERR_MASK(RcvLongPktLenErr)) + strlcat(buf, "rlongpktlen ", blen); + if (err & ERR_MASK(RcvMaxPktLenErr)) + strlcat(buf, "rmaxpktlen ", blen); + if (err & ERR_MASK(RcvMinPktLenErr)) + strlcat(buf, "rminpktlen ", blen); + if (err & ERR_MASK(SendMinPktLenErr)) + strlcat(buf, "sminpktlen ", blen); + if (err & ERR_MASK(RcvFormatErr)) + strlcat(buf, "rformaterr ", blen); + if (err & ERR_MASK(RcvUnsupportedVLErr)) + strlcat(buf, "runsupvl ", blen); + if (err & ERR_MASK(RcvUnexpectedCharErr)) + strlcat(buf, "runexpchar ", blen); + if (err & ERR_MASK(RcvIBFlowErr)) + strlcat(buf, "ribflow ", blen); + if (err & ERR_MASK(SendUnderRunErr)) + strlcat(buf, "sunderrun ", blen); + if (err & ERR_MASK(SendPioArmLaunchErr)) + strlcat(buf, "spioarmlaunch ", blen); + if (err & ERR_MASK(SendUnexpectedPktNumErr)) + strlcat(buf, "sunexperrpktnum ", blen); + if (err & ERR_MASK(SendDroppedSmpPktErr)) + strlcat(buf, "sdroppedsmppkt ", blen); + if (err & ERR_MASK(SendMaxPktLenErr)) + strlcat(buf, "smaxpktlen ", blen); + if (err & ERR_MASK(SendUnsupportedVLErr)) + strlcat(buf, "sunsupVL ", blen); + if (err & ERR_MASK(InvalidAddrErr)) + strlcat(buf, "invalidaddr ", blen); + if (err & ERR_MASK(RcvEgrFullErr)) + strlcat(buf, "rcvegrfull ", blen); + if (err & ERR_MASK(RcvHdrFullErr)) + strlcat(buf, "rcvhdrfull ", blen); + if (err & ERR_MASK(IBStatusChanged)) + strlcat(buf, "ibcstatuschg ", blen); + if (err & ERR_MASK(RcvIBLostLinkErr)) + strlcat(buf, "riblostlink ", blen); + if (err & ERR_MASK(HardwareErr)) + strlcat(buf, "hardware ", blen); + if (err & ERR_MASK(ResetNegated)) + strlcat(buf, "reset ", blen); +done: + return iserr; +} + +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + */ +static void qib_disarm_6120_senderrbufs(struct qib_pportdata *ppd) +{ + unsigned long sbuf[2]; + struct qib_devdata *dd = ppd->dd; + + /* + * It's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling. + */ + sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror); + sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1); + + if (sbuf[0] || sbuf[1]) + qib_disarm_piobufs_set(dd, sbuf, + dd->piobcnt2k + dd->piobcnt4k); +} + +static int chk_6120_linkrecovery(struct qib_devdata *dd, u64 ibcs) +{ + int ret = 1; + u32 ibstate = qib_6120_iblink_state(ibcs); + u32 linkrecov = read_6120_creg32(dd, cr_iblinkerrrecov); + + if (linkrecov != dd->cspec->lastlinkrecov) { + /* and no more until active again */ + dd->cspec->lastlinkrecov = 0; + qib_set_linkstate(dd->pport, QIB_IB_LINKDOWN); + ret = 0; + } + if (ibstate == IB_PORT_ACTIVE) + dd->cspec->lastlinkrecov = + read_6120_creg32(dd, cr_iblinkerrrecov); + return ret; +} + +static void handle_6120_errors(struct qib_devdata *dd, u64 errs) +{ + char *msg; + u64 ignore_this_time = 0; + u64 iserr = 0; + int log_idx; + struct qib_pportdata *ppd = dd->pport; + u64 mask; + + /* don't report errors that are masked */ + errs &= dd->cspec->errormask; + msg = dd->cspec->emsgbuf; + + /* do these first, they are most important */ + if (errs & ERR_MASK(HardwareErr)) + qib_handle_6120_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf); + else + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (errs & dd->eep_st_masks[log_idx].errs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + if (errs & ~IB_E_BITSEXTANT) + qib_dev_err(dd, "error interrupt with unknown errors " + "%llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); + + if (errs & E_SUM_ERRS) { + qib_disarm_6120_senderrbufs(ppd); + if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when trying to bring the link + * up, but the IB link changes state at the "wrong" + * time. The IB logic then complains that the packet + * isn't valid. We don't want to confuse people, so + * we just don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + } else if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + + qib_write_kreg(dd, kr_errclear, errs); + + errs &= ~ignore_this_time; + if (!errs) + goto done; + + /* + * The ones we mask off are handled specially below + * or above. + */ + mask = ERR_MASK(IBStatusChanged) | ERR_MASK(RcvEgrFullErr) | + ERR_MASK(RcvHdrFullErr) | ERR_MASK(HardwareErr); + qib_decode_6120_err(dd, msg, sizeof dd->cspec->emsgbuf, errs & ~mask); + + if (errs & E_SUM_PKTERRS) + qib_stats.sps_rcverrs++; + if (errs & E_SUM_ERRS) + qib_stats.sps_txerrs++; + + iserr = errs & ~(E_SUM_PKTERRS | QLOGIC_IB_E_PKTERRS); + + if (errs & ERR_MASK(IBStatusChanged)) { + u64 ibcs = qib_read_kreg64(dd, kr_ibcstatus); + u32 ibstate = qib_6120_iblink_state(ibcs); + int handle = 1; + + if (ibstate != IB_PORT_INIT && dd->cspec->lastlinkrecov) + handle = chk_6120_linkrecovery(dd, ibcs); + /* + * Since going into a recovery state causes the link state + * to go down and since recovery is transitory, it is better + * if we "miss" ever seeing the link training state go into + * recovery (i.e., ignore this transition for link state + * special handling purposes) without updating lastibcstat. + */ + if (handle && qib_6120_phys_portstate(ibcs) == + IB_PHYSPORTSTATE_LINK_ERR_RECOVER) + handle = 0; + if (handle) + qib_handle_e_ibstatuschanged(ppd, ibcs); + } + + if (errs & ERR_MASK(ResetNegated)) { + qib_dev_err(dd, "Got reset, requires re-init " + "(unload and reload driver)\n"); + dd->flags &= ~QIB_INITTED; /* needs re-init */ + /* mark as having had error */ + *dd->devstatusp |= QIB_STATUS_HWERROR; + *dd->pport->statusp &= ~QIB_STATUS_IB_CONF; + } + + if (*msg && iserr) + qib_dev_porterr(dd, ppd->port, "%s error\n", msg); + + if (ppd->state_wanted & ppd->lflags) + wake_up_interruptible(&ppd->state_wait); + + /* + * If there were hdrq or egrfull errors, wake up any processes + * waiting in poll. We used to try to check which contexts had + * the overflow, but given the cost of that and the chip reads + * to support it, it's better to just wake everybody up if we + * get an overflow; waiters can poll again if it's not them. + */ + if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) { + qib_handle_urcv(dd, ~0U); + if (errs & ERR_MASK(RcvEgrFullErr)) + qib_stats.sps_buffull++; + else + qib_stats.sps_hdrfull++; + } +done: + return; +} + +/** + * qib_6120_init_hwerrors - enable hardware errors + * @dd: the qlogic_ib device + * + * now that we have finished initializing everything that might reasonably + * cause a hardware error, and cleared those errors bits as they occur, + * we can enable hardware errors in the mask (potentially enabling + * freeze mode), and enable hardware errors as errors (along with + * everything else) in errormask + */ +static void qib_6120_init_hwerrors(struct qib_devdata *dd) +{ + u64 val; + u64 extsval; + + extsval = qib_read_kreg64(dd, kr_extstatus); + + if (!(extsval & QLOGIC_IB_EXTS_MEMBIST_ENDTEST)) + qib_dev_err(dd, "MemBIST did not complete!\n"); + + /* init so all hwerrors interrupt, and enter freeze, ajdust below */ + val = ~0ULL; + if (dd->minrev < 2) { + /* + * Avoid problem with internal interface bus parity + * checking. Fixed in Rev2. + */ + val &= ~QLOGIC_IB_HWE_PCIEBUSPARITYRADM; + } + /* avoid some intel cpu's speculative read freeze mode issue */ + val &= ~TXEMEMPARITYERR_PIOBUF; + + dd->cspec->hwerrmask = val; + + qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + + /* clear all */ + qib_write_kreg(dd, kr_errclear, ~0ULL); + /* enable errors that are masked, at least this first time. */ + qib_write_kreg(dd, kr_errmask, ~0ULL); + dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask); + /* clear any interrupts up to this point (ints still not enabled) */ + qib_write_kreg(dd, kr_intclear, ~0ULL); + + qib_write_kreg(dd, kr_rcvbthqp, + dd->qpn_mask << (QIB_6120_RcvBTHQP_BTHQP_Mask_LSB - 1) | + QIB_KD_QP); +} + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing + * on chips that are count-based, rather than trigger-based. There is no + * reference counting, but that's also fine, given the intended use. + * Only chip-specific because it's all register accesses + */ +static void qib_set_6120_armlaunch(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + qib_write_kreg(dd, kr_errclear, + ERR_MASK(SendPioArmLaunchErr)); + dd->cspec->errormask |= ERR_MASK(SendPioArmLaunchErr); + } else + dd->cspec->errormask &= ~ERR_MASK(SendPioArmLaunchErr); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +/* + * Formerly took parameter in pre-shifted, + * pre-merged form with LinkCmd and LinkInitCmd + * together, and assuming the zero was NOP. + */ +static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd) +{ + u64 mod_wd; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + + mod_wd = (linkcmd << QLOGIC_IB_IBCC_LINKCMD_SHIFT) | + (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl | mod_wd); + /* write to chip to prevent back-to-back writes of control reg */ + qib_write_kreg(dd, kr_scratch, 0); +} + +/** + * qib_6120_bringup_serdes - bring up the serdes + * @dd: the qlogic_ib device + */ +static int qib_6120_bringup_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val, config1, prev_val, hwstat, ibc; + + /* Put IBC in reset, sends disabled */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, 0ULL); + + dd->cspec->ibdeltainprog = 1; + dd->cspec->ibsymsnap = read_6120_creg32(dd, cr_ibsymbolerr); + dd->cspec->iblnkerrsnap = read_6120_creg32(dd, cr_iblinkerrrecov); + + /* flowcontrolwatermark is in units of KBytes */ + ibc = 0x5ULL << SYM_LSB(IBCCtrl, FlowCtrlWaterMark); + /* + * How often flowctrl sent. More or less in usecs; balance against + * watermark value, so that in theory senders always get a flow + * control update in time to not let the IB link go idle. + */ + ibc |= 0x3ULL << SYM_LSB(IBCCtrl, FlowCtrlPeriod); + /* max error tolerance */ + dd->cspec->lli_thresh = 0xf; + ibc |= (u64) dd->cspec->lli_thresh << SYM_LSB(IBCCtrl, PhyerrThreshold); + /* use "real" buffer space for */ + ibc |= 4ULL << SYM_LSB(IBCCtrl, CreditScale); + /* IB credit flow control. */ + ibc |= 0xfULL << SYM_LSB(IBCCtrl, OverrunThreshold); + /* + * set initial max size pkt IBC will send, including ICRC; it's the + * PIO buffer size in dwords, less 1; also see qib_set_mtu() + */ + ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << SYM_LSB(IBCCtrl, MaxPktLen); + dd->cspec->ibcctrl = ibc; /* without linkcmd or linkinitcmd! */ + + /* initially come up waiting for TS1, without sending anything. */ + val = dd->cspec->ibcctrl | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg(dd, kr_ibcctrl, val); + + val = qib_read_kreg64(dd, kr_serdes_cfg0); + config1 = qib_read_kreg64(dd, kr_serdes_cfg1); + + /* + * Force reset on, also set rxdetect enable. Must do before reading + * serdesstatus at least for simulation, or some of the bits in + * serdes status will come back as undefined and cause simulation + * failures + */ + val |= SYM_MASK(SerdesCfg0, ResetPLL) | + SYM_MASK(SerdesCfg0, RxDetEnX) | + (SYM_MASK(SerdesCfg0, L1PwrDnA) | + SYM_MASK(SerdesCfg0, L1PwrDnB) | + SYM_MASK(SerdesCfg0, L1PwrDnC) | + SYM_MASK(SerdesCfg0, L1PwrDnD)); + qib_write_kreg(dd, kr_serdes_cfg0, val); + /* be sure chip saw it */ + qib_read_kreg64(dd, kr_scratch); + udelay(5); /* need pll reset set at least for a bit */ + /* + * after PLL is reset, set the per-lane Resets and TxIdle and + * clear the PLL reset and rxdetect (to get falling edge). + * Leave L1PWR bits set (permanently) + */ + val &= ~(SYM_MASK(SerdesCfg0, RxDetEnX) | + SYM_MASK(SerdesCfg0, ResetPLL) | + (SYM_MASK(SerdesCfg0, L1PwrDnA) | + SYM_MASK(SerdesCfg0, L1PwrDnB) | + SYM_MASK(SerdesCfg0, L1PwrDnC) | + SYM_MASK(SerdesCfg0, L1PwrDnD))); + val |= (SYM_MASK(SerdesCfg0, ResetA) | + SYM_MASK(SerdesCfg0, ResetB) | + SYM_MASK(SerdesCfg0, ResetC) | + SYM_MASK(SerdesCfg0, ResetD)) | + SYM_MASK(SerdesCfg0, TxIdeEnX); + qib_write_kreg(dd, kr_serdes_cfg0, val); + /* be sure chip saw it */ + (void) qib_read_kreg64(dd, kr_scratch); + /* need PLL reset clear for at least 11 usec before lane + * resets cleared; give it a few more to be sure */ + udelay(15); + val &= ~((SYM_MASK(SerdesCfg0, ResetA) | + SYM_MASK(SerdesCfg0, ResetB) | + SYM_MASK(SerdesCfg0, ResetC) | + SYM_MASK(SerdesCfg0, ResetD)) | + SYM_MASK(SerdesCfg0, TxIdeEnX)); + + qib_write_kreg(dd, kr_serdes_cfg0, val); + /* be sure chip saw it */ + (void) qib_read_kreg64(dd, kr_scratch); + + val = qib_read_kreg64(dd, kr_xgxs_cfg); + prev_val = val; + if (val & QLOGIC_IB_XGXS_RESET) + val &= ~QLOGIC_IB_XGXS_RESET; + if (SYM_FIELD(val, XGXSCfg, polarity_inv) != ppd->rx_pol_inv) { + /* need to compensate for Tx inversion in partner */ + val &= ~SYM_MASK(XGXSCfg, polarity_inv); + val |= (u64)ppd->rx_pol_inv << SYM_LSB(XGXSCfg, polarity_inv); + } + if (val != prev_val) + qib_write_kreg(dd, kr_xgxs_cfg, val); + + val = qib_read_kreg64(dd, kr_serdes_cfg0); + + /* clear current and de-emphasis bits */ + config1 &= ~0x0ffffffff00ULL; + /* set current to 20ma */ + config1 |= 0x00000000000ULL; + /* set de-emphasis to -5.68dB */ + config1 |= 0x0cccc000000ULL; + qib_write_kreg(dd, kr_serdes_cfg1, config1); + + /* base and port guid same for single port */ + ppd->guid = dd->base_guid; + + /* + * the process of setting and un-resetting the serdes normally + * causes a serdes PLL error, so check for that and clear it + * here. Also clearr hwerr bit in errstatus, but not others. + */ + hwstat = qib_read_kreg64(dd, kr_hwerrstatus); + if (hwstat) { + /* should just have PLL, clear all set, in an case */ + qib_write_kreg(dd, kr_hwerrclear, hwstat); + qib_write_kreg(dd, kr_errclear, ERR_MASK(HardwareErr)); + } + + dd->control |= QLOGIC_IB_C_LINKENABLE; + dd->control &= ~QLOGIC_IB_C_FREEZEMODE; + qib_write_kreg(dd, kr_control, dd->control); + + return 0; +} + +/** + * qib_6120_quiet_serdes - set serdes to txidle + * @ppd: physical port of the qlogic_ib device + * Called when driver is being unloaded + */ +static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val; + + qib_set_ib_6120_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + + /* disable IBC */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, + dd->control | QLOGIC_IB_C_FREEZEMODE); + + if (dd->cspec->ibsymdelta || dd->cspec->iblnkerrdelta || + dd->cspec->ibdeltainprog) { + u64 diagc; + + /* enable counter writes */ + diagc = qib_read_kreg64(dd, kr_hwdiagctrl); + qib_write_kreg(dd, kr_hwdiagctrl, + diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable)); + + if (dd->cspec->ibsymdelta || dd->cspec->ibdeltainprog) { + val = read_6120_creg32(dd, cr_ibsymbolerr); + if (dd->cspec->ibdeltainprog) + val -= val - dd->cspec->ibsymsnap; + val -= dd->cspec->ibsymdelta; + write_6120_creg(dd, cr_ibsymbolerr, val); + } + if (dd->cspec->iblnkerrdelta || dd->cspec->ibdeltainprog) { + val = read_6120_creg32(dd, cr_iblinkerrrecov); + if (dd->cspec->ibdeltainprog) + val -= val - dd->cspec->iblnkerrsnap; + val -= dd->cspec->iblnkerrdelta; + write_6120_creg(dd, cr_iblinkerrrecov, val); + } + + /* and disable counter writes */ + qib_write_kreg(dd, kr_hwdiagctrl, diagc); + } + + val = qib_read_kreg64(dd, kr_serdes_cfg0); + val |= SYM_MASK(SerdesCfg0, TxIdeEnX); + qib_write_kreg(dd, kr_serdes_cfg0, val); +} + +/** + * qib_6120_setup_setextled - set the state of the two external LEDs + * @dd: the qlogic_ib device + * @on: whether the link is up or not + * + * The exact combo of LEDs if on is true is determined by looking + * at the ibcstatus. + + * These LEDs indicate the physical and logical state of IB link. + * For this chip (at least with recommended board pinouts), LED1 + * is Yellow (logical state) and LED2 is Green (physical state), + * + * Note: We try to match the Mellanox HCA LED behavior as best + * we can. Green indicates physical link state is OK (something is + * plugged in, and we can train). + * Amber indicates the link is logically up (ACTIVE). + * Mellanox further blinks the amber LED to indicate data packet + * activity, but we have no hardware support for that, so it would + * require waking up every 10-20 msecs and checking the counters + * on the chip, and then turning the LED off if appropriate. That's + * visible overhead, so not something we will do. + * + */ +static void qib_6120_setup_setextled(struct qib_pportdata *ppd, u32 on) +{ + u64 extctl, val, lst, ltst; + unsigned long flags; + struct qib_devdata *dd = ppd->dd; + + /* + * The diags use the LED to indicate diag info, so we leave + * the external LED alone when the diags are running. + */ + if (dd->diag_client) + return; + + /* Allow override of LED display for, e.g. Locating system in rack */ + if (ppd->led_override) { + ltst = (ppd->led_override & QIB_LED_PHYS) ? + IB_PHYSPORTSTATE_LINKUP : IB_PHYSPORTSTATE_DISABLED, + lst = (ppd->led_override & QIB_LED_LOG) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + } else if (on) { + val = qib_read_kreg64(dd, kr_ibcstatus); + ltst = qib_6120_phys_portstate(val); + lst = qib_6120_iblink_state(val); + } else { + ltst = 0; + lst = 0; + } + + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + extctl = dd->cspec->extctrl & ~(SYM_MASK(EXTCtrl, LEDPriPortGreenOn) | + SYM_MASK(EXTCtrl, LEDPriPortYellowOn)); + + if (ltst == IB_PHYSPORTSTATE_LINKUP) + extctl |= SYM_MASK(EXTCtrl, LEDPriPortYellowOn); + if (lst == IB_PORT_ACTIVE) + extctl |= SYM_MASK(EXTCtrl, LEDPriPortGreenOn); + dd->cspec->extctrl = extctl; + qib_write_kreg(dd, kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); +} + +static void qib_6120_free_irq(struct qib_devdata *dd) +{ + if (dd->cspec->irq) { + free_irq(dd->cspec->irq, dd); + dd->cspec->irq = 0; + } + qib_nomsi(dd); +} + +/** + * qib_6120_setup_cleanup - clean up any per-chip chip-specific stuff + * @dd: the qlogic_ib device + * + * This is called during driver unload. +*/ +static void qib_6120_setup_cleanup(struct qib_devdata *dd) +{ + qib_6120_free_irq(dd); + kfree(dd->cspec->cntrs); + kfree(dd->cspec->portcntrs); + if (dd->cspec->dummy_hdrq) { + dma_free_coherent(&dd->pcidev->dev, + ALIGN(dd->rcvhdrcnt * + dd->rcvhdrentsize * + sizeof(u32), PAGE_SIZE), + dd->cspec->dummy_hdrq, + dd->cspec->dummy_hdrq_phys); + dd->cspec->dummy_hdrq = NULL; + } +} + +static void qib_wantpiobuf_6120_intr(struct qib_devdata *dd, u32 needint) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (needint) + dd->sendctrl |= SYM_MASK(SendCtrl, PIOIntBufAvail); + else + dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOIntBufAvail); + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0ULL); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); +} + +/* + * handle errors and unusual events first, separate function + * to improve cache hits for fast path interrupt handling + */ +static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat) +{ + if (unlikely(istat & ~QLOGIC_IB_I_BITSEXTANT)) + qib_dev_err(dd, "interrupt with unknown interrupts %Lx set\n", + istat & ~QLOGIC_IB_I_BITSEXTANT); + + if (istat & QLOGIC_IB_I_ERROR) { + u64 estat = 0; + + qib_stats.sps_errints++; + estat = qib_read_kreg64(dd, kr_errstatus); + if (!estat) + qib_devinfo(dd->pcidev, "error interrupt (%Lx), " + "but no error bits set!\n", istat); + handle_6120_errors(dd, estat); + } + + if (istat & QLOGIC_IB_I_GPIO) { + u32 gpiostatus; + u32 to_clear = 0; + + /* + * GPIO_3..5 on IBA6120 Rev2 chips indicate + * errors that we need to count. + */ + gpiostatus = qib_read_kreg32(dd, kr_gpio_status); + /* First the error-counter case. */ + if (gpiostatus & GPIO_ERRINTR_MASK) { + /* want to clear the bits we see asserted. */ + to_clear |= (gpiostatus & GPIO_ERRINTR_MASK); + + /* + * Count appropriately, clear bits out of our copy, + * as they have been "handled". + */ + if (gpiostatus & (1 << GPIO_RXUVL_BIT)) + dd->cspec->rxfc_unsupvl_errs++; + if (gpiostatus & (1 << GPIO_OVRUN_BIT)) + dd->cspec->overrun_thresh_errs++; + if (gpiostatus & (1 << GPIO_LLI_BIT)) + dd->cspec->lli_errs++; + gpiostatus &= ~GPIO_ERRINTR_MASK; + } + if (gpiostatus) { + /* + * Some unexpected bits remain. If they could have + * caused the interrupt, complain and clear. + * To avoid repetition of this condition, also clear + * the mask. It is almost certainly due to error. + */ + const u32 mask = qib_read_kreg32(dd, kr_gpio_mask); + + /* + * Also check that the chip reflects our shadow, + * and report issues, If they caused the interrupt. + * we will suppress by refreshing from the shadow. + */ + if (mask & gpiostatus) { + to_clear |= (gpiostatus & mask); + dd->cspec->gpio_mask &= ~(gpiostatus & mask); + qib_write_kreg(dd, kr_gpio_mask, + dd->cspec->gpio_mask); + } + } + if (to_clear) + qib_write_kreg(dd, kr_gpio_clear, (u64) to_clear); + } +} + +static irqreturn_t qib_6120intr(int irq, void *data) +{ + struct qib_devdata *dd = data; + irqreturn_t ret; + u32 istat, ctxtrbits, rmask, crcs = 0; + unsigned i; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) { + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + ret = IRQ_HANDLED; + goto bail; + } + + istat = qib_read_kreg32(dd, kr_intstatus); + + if (unlikely(!istat)) { + ret = IRQ_NONE; /* not our interrupt, or already handled */ + goto bail; + } + if (unlikely(istat == -1)) { + qib_bad_intrstatus(dd); + /* don't know if it was our interrupt or not */ + ret = IRQ_NONE; + goto bail; + } + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | + QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) + unlikely_6120_intr(dd, istat); + + /* + * Clear the interrupt bits we found set, relatively early, so we + * "know" know the chip will have seen this by the time we process + * the queue, and will re-interrupt if necessary. The processor + * itself won't take the interrupt again until we return. + */ + qib_write_kreg(dd, kr_intclear, istat); + + /* + * Handle kernel receive queues before checking for pio buffers + * available since receives can overflow; piobuf waiters can afford + * a few extra cycles, since they were waiting anyway. + */ + ctxtrbits = istat & + ((QLOGIC_IB_I_RCVAVAIL_MASK << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT)); + if (ctxtrbits) { + rmask = (1U << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (1U << QLOGIC_IB_I_RCVURG_SHIFT); + for (i = 0; i < dd->first_user_ctxt; i++) { + if (ctxtrbits & rmask) { + ctxtrbits &= ~rmask; + crcs += qib_kreceive(dd->rcd[i], + &dd->cspec->lli_counter, + NULL); + } + rmask <<= 1; + } + if (crcs) { + u32 cntr = dd->cspec->lli_counter; + cntr += crcs; + if (cntr) { + if (cntr > dd->cspec->lli_thresh) { + dd->cspec->lli_counter = 0; + dd->cspec->lli_errs++; + } else + dd->cspec->lli_counter += cntr; + } + } + + + if (ctxtrbits) { + ctxtrbits = + (ctxtrbits >> QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (ctxtrbits >> QLOGIC_IB_I_RCVURG_SHIFT); + qib_handle_urcv(dd, ctxtrbits); + } + } + + if ((istat & QLOGIC_IB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED)) + qib_ib_piobufavail(dd); + + ret = IRQ_HANDLED; +bail: + return ret; +} + +/* + * Set up our chip-specific interrupt handler + * The interrupt type has already been setup, so + * we just need to do the registration and error checking. + */ +static void qib_setup_6120_interrupt(struct qib_devdata *dd) +{ + /* + * If the chip supports added error indication via GPIO pins, + * enable interrupts on those bits so the interrupt routine + * can count the events. Also set flag so interrupt routine + * can know they are expected. + */ + if (SYM_FIELD(dd->revision, Revision_R, + ChipRevMinor) > 1) { + /* Rev2+ reports extra errors via internal GPIO pins */ + dd->cspec->gpio_mask |= GPIO_ERRINTR_MASK; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + } + + if (!dd->cspec->irq) + qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " + "work\n"); + else { + int ret; + ret = request_irq(dd->cspec->irq, qib_6120intr, 0, + QIB_DRV_NAME, dd); + if (ret) + qib_dev_err(dd, "Couldn't setup interrupt " + "(irq=%d): %d\n", dd->cspec->irq, + ret); + } +} + +/** + * pe_boardname - fill in the board name + * @dd: the qlogic_ib device + * + * info is based on the board revision register + */ +static void pe_boardname(struct qib_devdata *dd) +{ + char *n; + u32 boardid, namelen; + + boardid = SYM_FIELD(dd->revision, Revision, + BoardID); + + switch (boardid) { + case 2: + n = "InfiniPath_QLE7140"; + break; + default: + qib_dev_err(dd, "Unknown 6120 board with ID %u\n", boardid); + n = "Unknown_InfiniPath_6120"; + break; + } + namelen = strlen(n) + 1; + dd->boardname = kmalloc(namelen, GFP_KERNEL); + if (!dd->boardname) + qib_dev_err(dd, "Failed allocation for board name: %s\n", n); + else + snprintf(dd->boardname, namelen, "%s", n); + + if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) + qib_dev_err(dd, "Unsupported InfiniPath hardware revision " + "%u.%u!\n", dd->majrev, dd->minrev); + + snprintf(dd->boardversion, sizeof(dd->boardversion), + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", + QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, + (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + dd->majrev, dd->minrev, + (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + +} + +/* + * This routine sleeps, so it can only be called from user context, not + * from interrupt context. If we need interrupt context, we can split + * it into two routines. + */ +static int qib_6120_setup_reset(struct qib_devdata *dd) +{ + u64 val; + int i; + int ret; + u16 cmdval; + u8 int_line, clinesz; + + qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz); + + /* Use ERROR so it shows up in logs, etc. */ + qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit); + + /* no interrupts till re-initted */ + qib_6120_set_intr_state(dd, 0); + + dd->cspec->ibdeltainprog = 0; + dd->cspec->ibsymdelta = 0; + dd->cspec->iblnkerrdelta = 0; + + /* + * Keep chip from being accessed until we are ready. Use + * writeq() directly, to allow the write even though QIB_PRESENT + * isn't set. + */ + dd->flags &= ~(QIB_INITTED | QIB_PRESENT); + dd->int_counter = 0; /* so we check interrupts work again */ + val = dd->control | QLOGIC_IB_C_RESET; + writeq(val, &dd->kregbase[kr_control]); + mb(); /* prevent compiler re-ordering around actual reset */ + + for (i = 1; i <= 5; i++) { + /* + * Allow MBIST, etc. to complete; longer on each retry. + * We sometimes get machine checks from bus timeout if no + * response, so for now, make it *really* long. + */ + msleep(1000 + (1 + i) * 2000); + + qib_pcie_reenable(dd, cmdval, int_line, clinesz); + + /* + * Use readq directly, so we don't need to mark it as PRESENT + * until we get a successful indication that all is well. + */ + val = readq(&dd->kregbase[kr_revision]); + if (val == dd->revision) { + dd->flags |= QIB_PRESENT; /* it's back */ + ret = qib_reinit_intr(dd); + goto bail; + } + } + ret = 0; /* failed */ + +bail: + if (ret) { + if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + qib_dev_err(dd, "Reset failed to setup PCIe or " + "interrupts; continuing anyway\n"); + /* clear the reset error, init error/hwerror mask */ + qib_6120_init_hwerrors(dd); + /* for Rev2 error interrupts; nop for rev 1 */ + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + /* clear the reset error, init error/hwerror mask */ + qib_6120_init_hwerrors(dd); + } + return ret; +} + +/** + * qib_6120_put_tid - write a TID in chip + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + * + * This exists as a separate routine to allow for special locking etc. + * It's used for both the full cleanup on exit, as well as the normal + * setup and teardown. + */ +static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + u32 __iomem *tidp32 = (u32 __iomem *)tidptr; + unsigned long flags; + int tidx; + spinlock_t *tidlockp; /* select appropriate spinlock */ + + if (!dd->kregbase) + return; + + if (pa != dd->tidinvalid) { + if (pa & ((1U << 11) - 1)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + pa >>= 11; + if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { + qib_dev_err(dd, "Physical page address 0x%lx " + "larger than supported\n", pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + pa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + pa |= 2 << 29; + } + + /* + * Avoid chip issue by writing the scratch register + * before and after the TID, and with an io write barrier. + * We use a spinlock around the writes, so they can't intermix + * with other TID (eager or expected) writes (the chip problem + * is triggered by back to back TID writes). Unfortunately, this + * call can be done from interrupt level for the ctxt 0 eager TIDs, + * so we have to use irqsave locks. + */ + /* + * Assumes tidptr always > egrtidbase + * if type == RCVHQ_RCV_TYPE_EAGER. + */ + tidx = tidptr - dd->egrtidbase; + + tidlockp = (type == RCVHQ_RCV_TYPE_EAGER && tidx < dd->rcvhdrcnt) + ? &dd->cspec->kernel_tid_lock : &dd->cspec->user_tid_lock; + spin_lock_irqsave(tidlockp, flags); + qib_write_kreg(dd, kr_scratch, 0xfeeddeaf); + writel(pa, tidp32); + qib_write_kreg(dd, kr_scratch, 0xdeadbeef); + mmiowb(); + spin_unlock_irqrestore(tidlockp, flags); +} + +/** + * qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + * + * This exists as a separate routine to allow for selection of the + * appropriate "flavor". The static calls in cleanup just use the + * revision-agnostic form, as they are not performance critical. + */ +static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + u32 __iomem *tidp32 = (u32 __iomem *)tidptr; + u32 tidx; + + if (!dd->kregbase) + return; + + if (pa != dd->tidinvalid) { + if (pa & ((1U << 11) - 1)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + pa >>= 11; + if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { + qib_dev_err(dd, "Physical page address 0x%lx " + "larger than supported\n", pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + pa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + pa |= 2 << 29; + } + tidx = tidptr - dd->egrtidbase; + writel(pa, tidp32); + mmiowb(); +} + + +/** + * qib_6120_clear_tids - clear all TID entries for a context, expected and eager + * @dd: the qlogic_ib device + * @ctxt: the context + * + * clear all TID entries for a context, expected and eager. + * Used from qib_close(). On this chip, TIDs are only 32 bits, + * not 64, but they are still on 64 bit boundaries, so tidbase + * is declared as u64 * for the pointer math, even though we write 32 bits + */ +static void qib_6120_clear_tids(struct qib_devdata *dd, + struct qib_ctxtdata *rcd) +{ + u64 __iomem *tidbase; + unsigned long tidinv; + u32 ctxt; + int i; + + if (!dd->kregbase || !rcd) + return; + + ctxt = rcd->ctxt; + + tidinv = dd->tidinvalid; + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvtidbase + + ctxt * dd->rcvtidcnt * sizeof(*tidbase)); + + for (i = 0; i < dd->rcvtidcnt; i++) + /* use func pointer because could be one of two funcs */ + dd->f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); + + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvegrbase + + rcd->rcvegr_tid_base * sizeof(*tidbase)); + + for (i = 0; i < rcd->rcvegrcnt; i++) + /* use func pointer because could be one of two funcs */ + dd->f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); +} + +/** + * qib_6120_tidtemplate - setup constants for TID updates + * @dd: the qlogic_ib device + * + * We setup stuff that we use a lot, to avoid calculating each time + */ +static void qib_6120_tidtemplate(struct qib_devdata *dd) +{ + u32 egrsize = dd->rcvegrbufsize; + + /* + * For now, we always allocate 4KB buffers (at init) so we can + * receive max size packets. We may want a module parameter to + * specify 2KB or 4KB and/or make be per ctxt instead of per device + * for those who want to reduce memory footprint. Note that the + * rcvhdrentsize size must be large enough to hold the largest + * IB header (currently 96 bytes) that we expect to handle (plus of + * course the 2 dwords of RHF). + */ + if (egrsize == 2048) + dd->tidtemplate = 1U << 29; + else if (egrsize == 4096) + dd->tidtemplate = 2U << 29; + dd->tidinvalid = 0; +} + +int __attribute__((weak)) qib_unordered_wc(void) +{ + return 0; +} + +/** + * qib_6120_get_base_info - set chip-specific flags for user code + * @rcd: the qlogic_ib ctxt + * @kbase: qib_base_info pointer + * + * We set the PCIE flag because the lower bandwidth on PCIe vs + * HyperTransport can affect some user packet algorithms. + */ +static int qib_6120_get_base_info(struct qib_ctxtdata *rcd, + struct qib_base_info *kinfo) +{ + if (qib_unordered_wc()) + kinfo->spi_runtime_flags |= QIB_RUNTIME_FORCE_WC_ORDER; + + kinfo->spi_runtime_flags |= QIB_RUNTIME_PCIE | + QIB_RUNTIME_FORCE_PIOAVAIL | QIB_RUNTIME_PIO_REGSWAPPED; + return 0; +} + + +static struct qib_message_header * +qib_6120_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) +{ + return (struct qib_message_header *) + &rhf_addr[sizeof(u64) / sizeof(u32)]; +} + +static void qib_6120_config_ctxts(struct qib_devdata *dd) +{ + dd->ctxtcnt = qib_read_kreg32(dd, kr_portcnt); + if (qib_n_krcv_queues > 1) { + dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports; + if (dd->first_user_ctxt > dd->ctxtcnt) + dd->first_user_ctxt = dd->ctxtcnt; + dd->qpn_mask = dd->first_user_ctxt <= 2 ? 2 : 6; + } else + dd->first_user_ctxt = dd->num_pports; + dd->n_krcv_queues = dd->first_user_ctxt; +} + +static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd, + u32 updegr, u32 egrhd, u32 npkts) +{ + if (updegr) + qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); +} + +static u32 qib_6120_hdrqempty(struct qib_ctxtdata *rcd) +{ + u32 head, tail; + + head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt); + if (rcd->rcvhdrtail_kvaddr) + tail = qib_get_rcvhdrtail(rcd); + else + tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt); + return head == tail; +} + +/* + * Used when we close any ctxt, for DMA already in flight + * at close. Can't be done until we know hdrq size, so not + * early in chip init. + */ +static void alloc_dummy_hdrq(struct qib_devdata *dd) +{ + dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, + dd->rcd[0]->rcvhdrq_size, + &dd->cspec->dummy_hdrq_phys, + GFP_ATOMIC | __GFP_COMP); + if (!dd->cspec->dummy_hdrq) { + qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); + /* fallback to just 0'ing */ + dd->cspec->dummy_hdrq_phys = 0UL; + } +} + +/* + * Modify the RCVCTRL register in chip-specific way. This + * is a function because bit positions and (future) register + * location is chip-specific, but the needed operations are + * generic. is a bit-mask because we often want to + * do multiple modifications. + */ +static void rcvctrl_6120_mod(struct qib_pportdata *ppd, unsigned int op, + int ctxt) +{ + struct qib_devdata *dd = ppd->dd; + u64 mask, val; + unsigned long flags; + + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + + if (op & QIB_RCVCTRL_TAILUPD_ENB) + dd->rcvctrl |= (1ULL << QLOGIC_IB_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_TAILUPD_DIS) + dd->rcvctrl &= ~(1ULL << QLOGIC_IB_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_PKEY_ENB) + dd->rcvctrl &= ~(1ULL << IBA6120_R_PKEY_DIS_SHIFT); + if (op & QIB_RCVCTRL_PKEY_DIS) + dd->rcvctrl |= (1ULL << IBA6120_R_PKEY_DIS_SHIFT); + if (ctxt < 0) + mask = (1ULL << dd->ctxtcnt) - 1; + else + mask = (1ULL << ctxt); + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* always done for specific ctxt */ + dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, PortEnable)); + if (!(dd->flags & QIB_NODMA_RTAIL)) + dd->rcvctrl |= 1ULL << QLOGIC_IB_R_TAILUPD_SHIFT; + /* Write these registers before the context is enabled. */ + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, + dd->rcd[ctxt]->rcvhdrqtailaddr_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, + dd->rcd[ctxt]->rcvhdrq_phys); + + if (ctxt == 0 && !dd->cspec->dummy_hdrq) + alloc_dummy_hdrq(dd); + } + if (op & QIB_RCVCTRL_CTXT_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, PortEnable)); + if (op & QIB_RCVCTRL_INTRAVAIL_ENB) + dd->rcvctrl |= (mask << QLOGIC_IB_R_INTRAVAIL_SHIFT); + if (op & QIB_RCVCTRL_INTRAVAIL_DIS) + dd->rcvctrl &= ~(mask << QLOGIC_IB_R_INTRAVAIL_SHIFT); + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && dd->rhdrhead_intr_off) { + /* arm rcv interrupt */ + val = qib_read_ureg32(dd, ur_rcvhdrhead, ctxt) | + dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* + * Init the context registers also; if we were + * disabled, tail and head should both be zero + * already from the enable, but since we don't + * know, we have to do it explicitly. + */ + val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); + qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); + + val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt); + dd->rcd[ctxt]->head = val; + /* If kctxt, interrupt on next receive. */ + if (ctxt < dd->first_user_ctxt) + val |= dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_DIS) { + /* + * Be paranoid, and never write 0's to these, just use an + * unused page. Of course, + * rcvhdraddr points to a large chunk of memory, so this + * could still trash things, but at least it won't trash + * page 0, and by disabling the ctxt, it should stop "soon", + * even if a packet or two is in already in flight after we + * disabled the ctxt. Only 6120 has this issue. + */ + if (ctxt >= 0) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, + dd->cspec->dummy_hdrq_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, + dd->cspec->dummy_hdrq_phys); + } else { + unsigned i; + + for (i = 0; i < dd->cfgctxts; i++) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, + i, dd->cspec->dummy_hdrq_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, + i, dd->cspec->dummy_hdrq_phys); + } + } + } + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); +} + +/* + * Modify the SENDCTRL register in chip-specific way. This + * is a function there may be multiple such registers with + * slightly different layouts. Only operations actually used + * are implemented yet. + * Chip requires no back-back sendctrl writes, so write + * scratch register after writing sendctrl + */ +static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op) +{ + struct qib_devdata *dd = ppd->dd; + u64 tmp_dd_sendctrl; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + + /* First the ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_CLEAR) + dd->sendctrl = 0; + if (op & QIB_SENDCTRL_SEND_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOEnable); + else if (op & QIB_SENDCTRL_SEND_ENB) + dd->sendctrl |= SYM_MASK(SendCtrl, PIOEnable); + if (op & QIB_SENDCTRL_AVAIL_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOBufAvailUpd); + else if (op & QIB_SENDCTRL_AVAIL_ENB) + dd->sendctrl |= SYM_MASK(SendCtrl, PIOBufAvailUpd); + + if (op & QIB_SENDCTRL_DISARM_ALL) { + u32 i, last; + + tmp_dd_sendctrl = dd->sendctrl; + /* + * disarm any that are not yet launched, disabling sends + * and updates until done. + */ + last = dd->piobcnt2k + dd->piobcnt4k; + tmp_dd_sendctrl &= + ~(SYM_MASK(SendCtrl, PIOEnable) | + SYM_MASK(SendCtrl, PIOBufAvailUpd)); + for (i = 0; i < last; i++) { + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl | + SYM_MASK(SendCtrl, Disarm) | i); + qib_write_kreg(dd, kr_scratch, 0); + } + } + + tmp_dd_sendctrl = dd->sendctrl; + + if (op & QIB_SENDCTRL_FLUSH) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Abort); + if (op & QIB_SENDCTRL_DISARM) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) | + ((op & QIB_6120_SendCtrl_DisarmPIOBuf_RMASK) << + SYM_LSB(SendCtrl, DisarmPIOBuf)); + if (op & QIB_SENDCTRL_AVAIL_BLIP) + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, PIOBufAvailUpd); + + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + if (op & QIB_SENDCTRL_AVAIL_BLIP) { + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + if (op & QIB_SENDCTRL_FLUSH) { + u32 v; + /* + * ensure writes have hit chip, then do a few + * more reads, to allow DMA of pioavail registers + * to occur, so in-memory copy is in sync with + * the chip. Not always safe to sleep. + */ + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + qib_read_kreg32(dd, kr_scratch); + } +} + +/** + * qib_portcntr_6120 - read a per-port counter + * @dd: the qlogic_ib device + * @creg: the counter to snapshot + */ +static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg) +{ + u64 ret = 0ULL; + struct qib_devdata *dd = ppd->dd; + u16 creg; + /* 0xffff for unimplemented or synthesized counters */ + static const u16 xlator[] = { + [QIBPORTCNTR_PKTSEND] = cr_pktsend, + [QIBPORTCNTR_WORDSEND] = cr_wordsend, + [QIBPORTCNTR_PSXMITDATA] = 0xffff, + [QIBPORTCNTR_PSXMITPKTS] = 0xffff, + [QIBPORTCNTR_PSXMITWAIT] = 0xffff, + [QIBPORTCNTR_SENDSTALL] = cr_sendstall, + [QIBPORTCNTR_PKTRCV] = cr_pktrcv, + [QIBPORTCNTR_PSRCVDATA] = 0xffff, + [QIBPORTCNTR_PSRCVPKTS] = 0xffff, + [QIBPORTCNTR_RCVEBP] = cr_rcvebp, + [QIBPORTCNTR_RCVOVFL] = cr_rcvovfl, + [QIBPORTCNTR_WORDRCV] = cr_wordrcv, + [QIBPORTCNTR_RXDROPPKT] = cr_rxdroppkt, + [QIBPORTCNTR_RXLOCALPHYERR] = 0xffff, + [QIBPORTCNTR_RXVLERR] = 0xffff, + [QIBPORTCNTR_ERRICRC] = cr_erricrc, + [QIBPORTCNTR_ERRVCRC] = cr_errvcrc, + [QIBPORTCNTR_ERRLPCRC] = cr_errlpcrc, + [QIBPORTCNTR_BADFORMAT] = cr_badformat, + [QIBPORTCNTR_ERR_RLEN] = cr_err_rlen, + [QIBPORTCNTR_IBSYMBOLERR] = cr_ibsymbolerr, + [QIBPORTCNTR_INVALIDRLEN] = cr_invalidrlen, + [QIBPORTCNTR_UNSUPVL] = cr_txunsupvl, + [QIBPORTCNTR_EXCESSBUFOVFL] = 0xffff, + [QIBPORTCNTR_ERRLINK] = cr_errlink, + [QIBPORTCNTR_IBLINKDOWN] = cr_iblinkdown, + [QIBPORTCNTR_IBLINKERRRECOV] = cr_iblinkerrrecov, + [QIBPORTCNTR_LLI] = 0xffff, + [QIBPORTCNTR_PSINTERVAL] = 0xffff, + [QIBPORTCNTR_PSSTART] = 0xffff, + [QIBPORTCNTR_PSSTAT] = 0xffff, + [QIBPORTCNTR_VL15PKTDROP] = 0xffff, + [QIBPORTCNTR_ERRPKEY] = cr_errpkey, + [QIBPORTCNTR_KHDROVFL] = 0xffff, + }; + + if (reg >= ARRAY_SIZE(xlator)) { + qib_devinfo(ppd->dd->pcidev, + "Unimplemented portcounter %u\n", reg); + goto done; + } + creg = xlator[reg]; + + /* handle counters requests not implemented as chip counters */ + if (reg == QIBPORTCNTR_LLI) + ret = dd->cspec->lli_errs; + else if (reg == QIBPORTCNTR_EXCESSBUFOVFL) + ret = dd->cspec->overrun_thresh_errs; + else if (reg == QIBPORTCNTR_KHDROVFL) { + int i; + + /* sum over all kernel contexts */ + for (i = 0; i < dd->first_user_ctxt; i++) + ret += read_6120_creg32(dd, cr_portovfl + i); + } else if (reg == QIBPORTCNTR_PSSTAT) + ret = dd->cspec->pma_sample_status; + if (creg == 0xffff) + goto done; + + /* + * only fast incrementing counters are 64bit; use 32 bit reads to + * avoid two independent reads when on opteron + */ + if (creg == cr_wordsend || creg == cr_wordrcv || + creg == cr_pktsend || creg == cr_pktrcv) + ret = read_6120_creg(dd, creg); + else + ret = read_6120_creg32(dd, creg); + if (creg == cr_ibsymbolerr) { + if (dd->cspec->ibdeltainprog) + ret -= ret - dd->cspec->ibsymsnap; + ret -= dd->cspec->ibsymdelta; + } else if (creg == cr_iblinkerrrecov) { + if (dd->cspec->ibdeltainprog) + ret -= ret - dd->cspec->iblnkerrsnap; + ret -= dd->cspec->iblnkerrdelta; + } + if (reg == QIBPORTCNTR_RXDROPPKT) /* add special cased count */ + ret += dd->cspec->rxfc_unsupvl_errs; + +done: + return ret; +} + +/* + * Device counter names (not port-specific), one line per stat, + * single string. Used by utilities like ipathstats to print the stats + * in a way which works for different versions of drivers, without changing + * the utility. Names need to be 12 chars or less (w/o newline), for proper + * display by utility. + * Non-error counters are first. + * Start of "error" conters is indicated by a leading "E " on the first + * "error" counter, and doesn't count in label length. + * The EgrOvfl list needs to be last so we truncate them at the configured + * context count for the device. + * cntr6120indices contains the corresponding register indices. + */ +static const char cntr6120names[] = + "Interrupts\n" + "HostBusStall\n" + "E RxTIDFull\n" + "RxTIDInvalid\n" + "Ctxt0EgrOvfl\n" + "Ctxt1EgrOvfl\n" + "Ctxt2EgrOvfl\n" + "Ctxt3EgrOvfl\n" + "Ctxt4EgrOvfl\n"; + +static const size_t cntr6120indices[] = { + cr_lbint, + cr_lbflowstall, + cr_errtidfull, + cr_errtidvalid, + cr_portovfl + 0, + cr_portovfl + 1, + cr_portovfl + 2, + cr_portovfl + 3, + cr_portovfl + 4, +}; + +/* + * same as cntr6120names and cntr6120indices, but for port-specific counters. + * portcntr6120indices is somewhat complicated by some registers needing + * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG + */ +static const char portcntr6120names[] = + "TxPkt\n" + "TxFlowPkt\n" + "TxWords\n" + "RxPkt\n" + "RxFlowPkt\n" + "RxWords\n" + "TxFlowStall\n" + "E IBStatusChng\n" + "IBLinkDown\n" + "IBLnkRecov\n" + "IBRxLinkErr\n" + "IBSymbolErr\n" + "RxLLIErr\n" + "RxBadFormat\n" + "RxBadLen\n" + "RxBufOvrfl\n" + "RxEBP\n" + "RxFlowCtlErr\n" + "RxICRCerr\n" + "RxLPCRCerr\n" + "RxVCRCerr\n" + "RxInvalLen\n" + "RxInvalPKey\n" + "RxPktDropped\n" + "TxBadLength\n" + "TxDropped\n" + "TxInvalLen\n" + "TxUnderrun\n" + "TxUnsupVL\n" + ; + +#define _PORT_VIRT_FLAG 0x8000 /* "virtual", need adjustments */ +static const size_t portcntr6120indices[] = { + QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG, + cr_pktsendflow, + QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG, + QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG, + cr_pktrcvflowctrl, + QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG, + QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG, + cr_ibstatuschange, + QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_LLI | _PORT_VIRT_FLAG, + QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG, + cr_rcvflowctrl_err, + QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG, + cr_invalidslen, + cr_senddropped, + cr_errslen, + cr_sendunderrun, + cr_txunsupvl, +}; + +/* do all the setup to make the counter reads efficient later */ +static void init_6120_cntrnames(struct qib_devdata *dd) +{ + int i, j = 0; + char *s; + + for (i = 0, s = (char *)cntr6120names; s && j <= dd->cfgctxts; + i++) { + /* we always have at least one counter before the egrovfl */ + if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12)) + j = 1; + s = strchr(s + 1, '\n'); + if (s && j) + j++; + } + dd->cspec->ncntrs = i; + if (!s) + /* full list; size is without terminating null */ + dd->cspec->cntrnamelen = sizeof(cntr6120names) - 1; + else + dd->cspec->cntrnamelen = 1 + s - cntr6120names; + dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->cntrs) + qib_dev_err(dd, "Failed allocation for counters\n"); + + for (i = 0, s = (char *)portcntr6120names; s; i++) + s = strchr(s + 1, '\n'); + dd->cspec->nportcntrs = i - 1; + dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1; + dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->portcntrs) + qib_dev_err(dd, "Failed allocation for portcounters\n"); +} + +static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep, + u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->cntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *)cntr6120names; + } else { + u64 *cntr = dd->cspec->cntrs; + int i; + + ret = dd->cspec->ncntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + if (pos >= ret) { + ret = 0; /* final read after getting everything */ + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->ncntrs; i++) + *cntr++ = read_6120_creg32(dd, cntr6120indices[i]); + } +done: + return ret; +} + +static u32 qib_read_6120portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, + char **namep, u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->portcntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *)portcntr6120names; + } else { + u64 *cntr = dd->cspec->portcntrs; + struct qib_pportdata *ppd = &dd->pport[port]; + int i; + + ret = dd->cspec->nportcntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->nportcntrs; i++) { + if (portcntr6120indices[i] & _PORT_VIRT_FLAG) + *cntr++ = qib_portcntr_6120(ppd, + portcntr6120indices[i] & + ~_PORT_VIRT_FLAG); + else + *cntr++ = read_6120_creg32(dd, + portcntr6120indices[i]); + } + } +done: + return ret; +} + +static void qib_chk_6120_errormask(struct qib_devdata *dd) +{ + static u32 fixed; + u32 ctrl; + unsigned long errormask; + unsigned long hwerrs; + + if (!dd->cspec->errormask || !(dd->flags & QIB_INITTED)) + return; + + errormask = qib_read_kreg64(dd, kr_errmask); + + if (errormask == dd->cspec->errormask) + return; + fixed++; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + ctrl = qib_read_kreg32(dd, kr_control); + + qib_write_kreg(dd, kr_errmask, + dd->cspec->errormask); + + if ((hwerrs & dd->cspec->hwerrmask) || + (ctrl & QLOGIC_IB_C_FREEZEMODE)) { + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, 0ULL); + /* force re-interrupt of pending events, just in case */ + qib_write_kreg(dd, kr_intclear, 0ULL); + qib_devinfo(dd->pcidev, + "errormask fixed(%u) %lx->%lx, ctrl %x hwerr %lx\n", + fixed, errormask, (unsigned long)dd->cspec->errormask, + ctrl, hwerrs); + } +} + +/** + * qib_get_faststats - get word counters from chip before they overflow + * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * + * This needs more work; in particular, decision on whether we really + * need traffic_wds done the way it is + * called from add_timer + */ +static void qib_get_6120_faststats(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_pportdata *ppd = dd->pport; + unsigned long flags; + u64 traffic_wds; + + /* + * don't access the chip while running diags, or memory diags can + * fail + */ + if (!(dd->flags & QIB_INITTED) || dd->diag_client) + /* but re-arm the timer, for diags case; won't hurt other */ + goto done; + + /* + * We now try to maintain an activity timer, based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + traffic_wds = qib_portcntr_6120(ppd, cr_wordsend) + + qib_portcntr_6120(ppd, cr_wordrcv); + spin_lock_irqsave(&dd->eep_st_lock, flags); + traffic_wds -= dd->traffic_wds; + dd->traffic_wds += traffic_wds; + if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(5, &dd->active_time); /* S/B #define */ + spin_unlock_irqrestore(&dd->eep_st_lock, flags); + + qib_chk_6120_errormask(dd); +done: + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); +} + +/* no interrupt fallback for these chips */ +static int qib_6120_nointr_fallback(struct qib_devdata *dd) +{ + return 0; +} + +/* + * reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. + */ +static void qib_6120_xgxs_reset(struct qib_pportdata *ppd) +{ + u64 val, prev_val; + struct qib_devdata *dd = ppd->dd; + + prev_val = qib_read_kreg64(dd, kr_xgxs_cfg); + val = prev_val | QLOGIC_IB_XGXS_RESET; + prev_val &= ~QLOGIC_IB_XGXS_RESET; /* be sure */ + qib_write_kreg(dd, kr_control, + dd->control & ~QLOGIC_IB_C_LINKENABLE); + qib_write_kreg(dd, kr_xgxs_cfg, val); + qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_xgxs_cfg, prev_val); + qib_write_kreg(dd, kr_control, dd->control); +} + +static int qib_6120_get_ib_cfg(struct qib_pportdata *ppd, int which) +{ + int ret; + + switch (which) { + case QIB_IB_CFG_LWID: + ret = ppd->link_width_active; + break; + + case QIB_IB_CFG_SPD: + ret = ppd->link_speed_active; + break; + + case QIB_IB_CFG_LWID_ENB: + ret = ppd->link_width_enabled; + break; + + case QIB_IB_CFG_SPD_ENB: + ret = ppd->link_speed_enabled; + break; + + case QIB_IB_CFG_OP_VLS: + ret = ppd->vls_operational; + break; + + case QIB_IB_CFG_VL_HIGH_CAP: + ret = 0; + break; + + case QIB_IB_CFG_VL_LOW_CAP: + ret = 0; + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + ret = SYM_FIELD(ppd->dd->cspec->ibcctrl, IBCCtrl, + OverrunThreshold); + break; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + ret = SYM_FIELD(ppd->dd->cspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + break; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + ret = (ppd->dd->cspec->ibcctrl & + SYM_MASK(IBCCtrl, LinkDownDefaultState)) ? + IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL; + break; + + case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */ + ret = 0; /* no heartbeat on this chip */ + break; + + case QIB_IB_CFG_PMA_TICKS: + ret = 250; /* 1 usec. */ + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + +/* + * We assume range checking is already done, if needed. + */ +static int qib_6120_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) +{ + struct qib_devdata *dd = ppd->dd; + int ret = 0; + u64 val64; + u16 lcmd, licmd; + + switch (which) { + case QIB_IB_CFG_LWID_ENB: + ppd->link_width_enabled = val; + break; + + case QIB_IB_CFG_SPD_ENB: + ppd->link_speed_enabled = val; + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + val64 = SYM_FIELD(dd->cspec->ibcctrl, IBCCtrl, + OverrunThreshold); + if (val64 != val) { + dd->cspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, OverrunThreshold); + dd->cspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, OverrunThreshold); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + break; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + val64 = SYM_FIELD(dd->cspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + if (val64 != val) { + dd->cspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, PhyerrThreshold); + dd->cspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, PhyerrThreshold); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + break; + + case QIB_IB_CFG_PKEYS: /* update pkeys */ + val64 = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + qib_write_kreg(dd, kr_partitionkey, val64); + break; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + if (val == IB_LINKINITCMD_POLL) + dd->cspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, LinkDownDefaultState); + else /* SLEEP */ + dd->cspec->ibcctrl |= + SYM_MASK(IBCCtrl, LinkDownDefaultState); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + break; + + case QIB_IB_CFG_MTU: /* update the MTU in IBC */ + /* + * Update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords + * Set even if it's unchanged, print debug message only + * on changes. + */ + val = (ppd->ibmaxlen >> 2) + 1; + dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, MaxPktLen); + dd->cspec->ibcctrl |= (u64)val << + SYM_LSB(IBCCtrl, MaxPktLen); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + break; + + case QIB_IB_CFG_LSTATE: /* set the IB link state */ + switch (val & 0xffff0000) { + case IB_LINKCMD_DOWN: + lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN; + if (!dd->cspec->ibdeltainprog) { + dd->cspec->ibdeltainprog = 1; + dd->cspec->ibsymsnap = + read_6120_creg32(dd, cr_ibsymbolerr); + dd->cspec->iblnkerrsnap = + read_6120_creg32(dd, cr_iblinkerrrecov); + } + break; + + case IB_LINKCMD_ARMED: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED; + break; + + case IB_LINKCMD_ACTIVE: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16); + goto bail; + } + switch (val & 0xffff) { + case IB_LINKINITCMD_NOP: + licmd = 0; + break; + + case IB_LINKINITCMD_POLL: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL; + break; + + case IB_LINKINITCMD_SLEEP: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP; + break; + + case IB_LINKINITCMD_DISABLE: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkinitcmd req 0x%x\n", + val & 0xffff); + goto bail; + } + qib_set_ib_6120_lstate(ppd, lcmd, licmd); + goto bail; + + case QIB_IB_CFG_HRTBT: + ret = -EINVAL; + break; + + default: + ret = -EINVAL; + } +bail: + return ret; +} + +static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what) +{ + int ret = 0; + if (!strncmp(what, "ibc", 3)) { + ppd->dd->cspec->ibcctrl |= SYM_MASK(IBCCtrl, Loopback); + qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n", + ppd->dd->unit, ppd->port); + } else if (!strncmp(what, "off", 3)) { + ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); + qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " + "(normal)\n", ppd->dd->unit, ppd->port); + } else + ret = -EINVAL; + if (!ret) { + qib_write_kreg(ppd->dd, kr_ibcctrl, ppd->dd->cspec->ibcctrl); + qib_write_kreg(ppd->dd, kr_scratch, 0); + } + return ret; +} + +static void pma_6120_timer(unsigned long data) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)data; + struct qib_chip_specific *cs = ppd->dd->cspec; + struct qib_ibport *ibp = &ppd->ibport_data; + unsigned long flags; + + spin_lock_irqsave(&ibp->lock, flags); + if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; + qib_snapshot_counters(ppd, &cs->sword, &cs->rword, + &cs->spkts, &cs->rpkts, &cs->xmit_wait); + mod_timer(&cs->pma_timer, + jiffies + usecs_to_jiffies(ibp->pma_sample_interval)); + } else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { + u64 ta, tb, tc, td, te; + + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; + qib_snapshot_counters(ppd, &ta, &tb, &tc, &td, &te); + + cs->sword = ta - cs->sword; + cs->rword = tb - cs->rword; + cs->spkts = tc - cs->spkts; + cs->rpkts = td - cs->rpkts; + cs->xmit_wait = te - cs->xmit_wait; + } + spin_unlock_irqrestore(&ibp->lock, flags); +} + +/* + * Note that the caller has the ibp->lock held. + */ +static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv, + u32 start) +{ + struct qib_chip_specific *cs = ppd->dd->cspec; + + if (start && intv) { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED; + mod_timer(&cs->pma_timer, jiffies + usecs_to_jiffies(start)); + } else if (intv) { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; + qib_snapshot_counters(ppd, &cs->sword, &cs->rword, + &cs->spkts, &cs->rpkts, &cs->xmit_wait); + mod_timer(&cs->pma_timer, jiffies + usecs_to_jiffies(intv)); + } else { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; + cs->sword = 0; + cs->rword = 0; + cs->spkts = 0; + cs->rpkts = 0; + cs->xmit_wait = 0; + } +} + +static u32 qib_6120_iblink_state(u64 ibcs) +{ + u32 state = (u32)SYM_FIELD(ibcs, IBCStatus, LinkState); + + switch (state) { + case IB_6120_L_STATE_INIT: + state = IB_PORT_INIT; + break; + case IB_6120_L_STATE_ARM: + state = IB_PORT_ARMED; + break; + case IB_6120_L_STATE_ACTIVE: + /* fall through */ + case IB_6120_L_STATE_ACT_DEFER: + state = IB_PORT_ACTIVE; + break; + default: /* fall through */ + case IB_6120_L_STATE_DOWN: + state = IB_PORT_DOWN; + break; + } + return state; +} + +/* returns the IBTA port state, rather than the IBC link training state */ +static u8 qib_6120_phys_portstate(u64 ibcs) +{ + u8 state = (u8)SYM_FIELD(ibcs, IBCStatus, LinkTrainingState); + return qib_6120_physportstate[state]; +} + +static int qib_6120_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + if (ibup) { + if (ppd->dd->cspec->ibdeltainprog) { + ppd->dd->cspec->ibdeltainprog = 0; + ppd->dd->cspec->ibsymdelta += + read_6120_creg32(ppd->dd, cr_ibsymbolerr) - + ppd->dd->cspec->ibsymsnap; + ppd->dd->cspec->iblnkerrdelta += + read_6120_creg32(ppd->dd, cr_iblinkerrrecov) - + ppd->dd->cspec->iblnkerrsnap; + } + qib_hol_init(ppd); + } else { + ppd->dd->cspec->lli_counter = 0; + if (!ppd->dd->cspec->ibdeltainprog) { + ppd->dd->cspec->ibdeltainprog = 1; + ppd->dd->cspec->ibsymsnap = + read_6120_creg32(ppd->dd, cr_ibsymbolerr); + ppd->dd->cspec->iblnkerrsnap = + read_6120_creg32(ppd->dd, cr_iblinkerrrecov); + } + qib_hol_down(ppd); + } + + qib_6120_setup_setextled(ppd, ibup); + + return 0; +} + +/* Does read/modify/write to appropriate registers to + * set output and direction bits selected by mask. + * these are in their canonical postions (e.g. lsb of + * dir will end up in D48 of extctrl on existing chips). + * returns contents of GP Inputs. + */ +static int gpio_6120_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask) +{ + u64 read_val, new_out; + unsigned long flags; + + if (mask) { + /* some bits being written, lock access to GPIO */ + dir &= mask; + out &= mask; + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe)); + dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe)); + new_out = (dd->cspec->gpio_out & ~mask) | out; + + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_out, new_out); + dd->cspec->gpio_out = new_out; + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + } + /* + * It is unlikely that a read at this time would get valid + * data on a pin whose direction line was set in the same + * call to this function. We include the read here because + * that allows us to potentially combine a change on one pin with + * a read on another, and because the old code did something like + * this. + */ + read_val = qib_read_kreg64(dd, kr_extstatus); + return SYM_FIELD(read_val, EXTStatus, GPIOIn); +} + +/* + * Read fundamental info we need to use the chip. These are + * the registers that describe chip capabilities, and are + * saved in shadow registers. + */ +static void get_6120_chip_params(struct qib_devdata *dd) +{ + u64 val; + u32 piobufs; + int mtu; + + dd->uregbase = qib_read_kreg32(dd, kr_userregbase); + + dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt); + dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase); + dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase); + dd->palign = qib_read_kreg32(dd, kr_palign); + dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase); + dd->pio2k_bufbase = dd->piobufbase & 0xffffffff; + + dd->rcvhdrcnt = qib_read_kreg32(dd, kr_rcvegrcnt); + + val = qib_read_kreg64(dd, kr_sendpiosize); + dd->piosize2k = val & ~0U; + dd->piosize4k = val >> 32; + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + dd->pport->ibmtu = (u32)mtu; + + val = qib_read_kreg64(dd, kr_sendpiobufcnt); + dd->piobcnt2k = val & ~0U; + dd->piobcnt4k = val >> 32; + /* these may be adjusted in init_chip_wc_pat() */ + dd->pio2kbase = (u32 __iomem *) + (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase); + if (dd->piobcnt4k) { + dd->pio4kbase = (u32 __iomem *) + (((char __iomem *) dd->kregbase) + + (dd->piobufbase >> 32)); + /* + * 4K buffers take 2 pages; we use roundup just to be + * paranoid; we calculate it once here, rather than on + * ever buf allocate + */ + dd->align4k = ALIGN(dd->piosize4k, dd->palign); + } + + piobufs = dd->piobcnt4k + dd->piobcnt2k; + + dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / + (sizeof(u64) * BITS_PER_BYTE / 2); +} + +/* + * The chip base addresses in cspec and cpspec have to be set + * after possible init_chip_wc_pat(), rather than in + * get_6120_chip_params(), so split out as separate function + */ +static void set_6120_baseaddrs(struct qib_devdata *dd) +{ + u32 cregbase; + cregbase = qib_read_kreg32(dd, kr_counterregbase); + dd->cspec->cregbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + cregbase); + + dd->egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + dd->rcvegrbase); +} + +/* + * Write the final few registers that depend on some of the + * init setup. Done late in init, just before bringing up + * the serdes. + */ +static int qib_late_6120_initreg(struct qib_devdata *dd) +{ + int ret = 0; + u64 val; + + qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize); + qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize); + qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt); + qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); + val = qib_read_kreg64(dd, kr_sendpioavailaddr); + if (val != dd->pioavailregs_phys) { + qib_dev_err(dd, "Catastrophic software error, " + "SendPIOAvailAddr written as %lx, " + "read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); + ret = -EINVAL; + } + return ret; +} + +static int init_6120_variables(struct qib_devdata *dd) +{ + int ret = 0; + struct qib_pportdata *ppd; + u32 sbufs; + + ppd = (struct qib_pportdata *)(dd + 1); + dd->pport = ppd; + dd->num_pports = 1; + + dd->cspec = (struct qib_chip_specific *)(ppd + dd->num_pports); + ppd->cpspec = NULL; /* not used in this chip */ + + spin_lock_init(&dd->cspec->kernel_tid_lock); + spin_lock_init(&dd->cspec->user_tid_lock); + spin_lock_init(&dd->cspec->rcvmod_lock); + spin_lock_init(&dd->cspec->gpio_lock); + + /* we haven't yet set QIB_PRESENT, so use read directly */ + dd->revision = readq(&dd->kregbase[kr_revision]); + + if ((dd->revision & 0xffffffffU) == 0xffffffffU) { + qib_dev_err(dd, "Revision register read failure, " + "giving up initialization\n"); + ret = -ENODEV; + goto bail; + } + dd->flags |= QIB_PRESENT; /* now register routines work */ + + dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMajor); + dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMinor); + + get_6120_chip_params(dd); + pe_boardname(dd); /* fill in boardname */ + + /* + * GPIO bits for TWSI data and clock, + * used for serial EEPROM. + */ + dd->gpio_sda_num = _QIB_GPIO_SDA_NUM; + dd->gpio_scl_num = _QIB_GPIO_SCL_NUM; + dd->twsi_eeprom_dev = QIB_TWSI_NO_DEV; + + if (qib_unordered_wc()) + dd->flags |= QIB_PIO_FLUSH_WC; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr); + + /* Ignore errors in PIO/PBC on systems with unordered write-combining */ + if (qib_unordered_wc()) + dd->eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY; + + dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr); + + dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); + + qib_init_pportdata(ppd, dd, 0, 1); + ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + ppd->link_speed_supported = QIB_IB_SDR; + ppd->link_width_enabled = IB_WIDTH_4X; + ppd->link_speed_enabled = ppd->link_speed_supported; + /* these can't change for this chip, so set once */ + ppd->link_width_active = ppd->link_width_enabled; + ppd->link_speed_active = ppd->link_speed_enabled; + ppd->vls_supported = IB_VL_VL0; + ppd->vls_operational = ppd->vls_supported; + + dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE; + dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE; + dd->rhf_offset = 0; + + /* we always allocate at least 2048 bytes for eager buffers */ + ret = ib_mtu_enum_to_int(qib_ibmtu); + dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); + + qib_6120_tidtemplate(dd); + + /* + * We can request a receive interrupt for 1 or + * more packets from current offset. For now, we set this + * up for a single packet. + */ + dd->rhdrhead_intr_off = 1ULL << 32; + + /* setup the stats timer; the add_timer is done at end of init */ + init_timer(&dd->stats_timer); + dd->stats_timer.function = qib_get_6120_faststats; + dd->stats_timer.data = (unsigned long) dd; + + init_timer(&dd->cspec->pma_timer); + dd->cspec->pma_timer.function = pma_6120_timer; + dd->cspec->pma_timer.data = (unsigned long) ppd; + + dd->ureg_align = qib_read_kreg32(dd, kr_palign); + + dd->piosize2kmax_dwords = dd->piosize2k >> 2; + qib_6120_config_ctxts(dd); + qib_set_ctxtcnt(dd); + + if (qib_wc_pat) { + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; + } + set_6120_baseaddrs(dd); /* set chip access pointers now */ + + ret = 0; + if (qib_mini_init) + goto bail; + + qib_num_cfg_vls = 1; /* if any 6120's, only one VL */ + + ret = qib_create_ctxts(dd); + init_6120_cntrnames(dd); + + /* use all of 4KB buffers for the kernel, otherwise 16 */ + sbufs = dd->piobcnt4k ? dd->piobcnt4k : 16; + + dd->lastctxt_piobuf = dd->piobcnt2k + dd->piobcnt4k - sbufs; + dd->pbufsctxt = dd->lastctxt_piobuf / + (dd->cfgctxts - dd->first_user_ctxt); + + if (ret) + goto bail; +bail: + return ret; +} + +/* + * For this chip, we want to use the same buffer every time + * when we are trying to bring the link up (they are always VL15 + * packets). At that link state the packet should always go out immediately + * (or at least be discarded at the tx interface if the link is down). + * If it doesn't, and the buffer isn't available, that means some other + * sender has gotten ahead of us, and is preventing our packet from going + * out. In that case, we flush all packets, and try again. If that still + * fails, we fail the request, and hope things work the next time around. + * + * We don't need very complicated heuristics on whether the packet had + * time to go out or not, since even at SDR 1X, it goes out in very short + * time periods, covered by the chip reads done here and as part of the + * flush. + */ +static u32 __iomem *get_6120_link_buf(struct qib_pportdata *ppd, u32 *bnum) +{ + u32 __iomem *buf; + u32 lbuf = ppd->dd->piobcnt2k + ppd->dd->piobcnt4k - 1; + + /* + * always blip to get avail list updated, since it's almost + * always needed, and is fairly cheap. + */ + sendctrl_6120_mod(ppd->dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); + if (buf) + goto done; + + sendctrl_6120_mod(ppd, QIB_SENDCTRL_DISARM_ALL | QIB_SENDCTRL_FLUSH | + QIB_SENDCTRL_AVAIL_BLIP); + ppd->dd->upd_pio_shadow = 1; /* update our idea of what's busy */ + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); +done: + return buf; +} + +static u32 __iomem *qib_6120_getsendbuf(struct qib_pportdata *ppd, u64 pbc, + u32 *pbufnum) +{ + u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK; + struct qib_devdata *dd = ppd->dd; + u32 __iomem *buf; + + if (((pbc >> 32) & PBC_6120_VL15_SEND_CTRL) && + !(ppd->lflags & (QIBL_IB_AUTONEG_INPROG | QIBL_LINKACTIVE))) + buf = get_6120_link_buf(ppd, pbufnum); + else { + + if ((plen + 1) > dd->piosize2kmax_dwords) + first = dd->piobcnt2k; + else + first = 0; + /* try 4k if all 2k busy, so same last for both sizes */ + last = dd->piobcnt2k + dd->piobcnt4k - 1; + buf = qib_getsendbuf_range(dd, pbufnum, first, last); + } + return buf; +} + +static int init_sdma_6120_regs(struct qib_pportdata *ppd) +{ + return -ENODEV; +} + +static u16 qib_sdma_6120_gethead(struct qib_pportdata *ppd) +{ + return 0; +} + +static int qib_sdma_6120_busy(struct qib_pportdata *ppd) +{ + return 0; +} + +static void qib_sdma_update_6120_tail(struct qib_pportdata *ppd, u16 tail) +{ +} + +static void qib_6120_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op) +{ +} + +static void qib_sdma_set_6120_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) +{ +} + +/* + * the pbc doesn't need a VL15 indicator, but we need it for link_buf. + * The chip ignores the bit if set. + */ +static u32 qib_6120_setpbc_control(struct qib_pportdata *ppd, u32 plen, + u8 srate, u8 vl) +{ + return vl == 15 ? PBC_6120_VL15_SEND_CTRL : 0; +} + +static void qib_6120_initvl15_bufs(struct qib_devdata *dd) +{ +} + +static void qib_6120_init_ctxt(struct qib_ctxtdata *rcd) +{ + rcd->rcvegrcnt = rcd->dd->rcvhdrcnt; + rcd->rcvegr_tid_base = rcd->ctxt * rcd->rcvegrcnt; +} + +static void qib_6120_txchk_change(struct qib_devdata *dd, u32 start, + u32 len, u32 avail, struct qib_ctxtdata *rcd) +{ +} + +static void writescratch(struct qib_devdata *dd, u32 val) +{ + (void) qib_write_kreg(dd, kr_scratch, val); +} + +static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum) +{ + return -ENXIO; +} + +/* Dummy function, as 6120 boards never disable EEPROM Write */ +static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) +{ + return 1; +} + +/** + * qib_init_iba6120_funcs - set up the chip-specific function pointers + * @pdev: pci_dev of the qlogic_ib device + * @ent: pci_device_id matching this chip + * + * This is global, and is called directly at init to set up the + * chip-specific function pointers for later use. + * + * It also allocates/partially-inits the qib_devdata struct for + * this device. + */ +struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct qib_devdata *dd; + int ret; + + dd = qib_alloc_devdata(pdev, sizeof(struct qib_pportdata) + + sizeof(struct qib_chip_specific)); + if (IS_ERR(dd)) + goto bail; + + dd->f_bringup_serdes = qib_6120_bringup_serdes; + dd->f_cleanup = qib_6120_setup_cleanup; + dd->f_clear_tids = qib_6120_clear_tids; + dd->f_free_irq = qib_6120_free_irq; + dd->f_get_base_info = qib_6120_get_base_info; + dd->f_get_msgheader = qib_6120_get_msgheader; + dd->f_getsendbuf = qib_6120_getsendbuf; + dd->f_gpio_mod = gpio_6120_mod; + dd->f_eeprom_wen = qib_6120_eeprom_wen; + dd->f_hdrqempty = qib_6120_hdrqempty; + dd->f_ib_updown = qib_6120_ib_updown; + dd->f_init_ctxt = qib_6120_init_ctxt; + dd->f_initvl15_bufs = qib_6120_initvl15_bufs; + dd->f_intr_fallback = qib_6120_nointr_fallback; + dd->f_late_initreg = qib_late_6120_initreg; + dd->f_setpbc_control = qib_6120_setpbc_control; + dd->f_portcntr = qib_portcntr_6120; + dd->f_put_tid = (dd->minrev >= 2) ? + qib_6120_put_tid_2 : + qib_6120_put_tid; + dd->f_quiet_serdes = qib_6120_quiet_serdes; + dd->f_rcvctrl = rcvctrl_6120_mod; + dd->f_read_cntrs = qib_read_6120cntrs; + dd->f_read_portcntrs = qib_read_6120portcntrs; + dd->f_reset = qib_6120_setup_reset; + dd->f_init_sdma_regs = init_sdma_6120_regs; + dd->f_sdma_busy = qib_sdma_6120_busy; + dd->f_sdma_gethead = qib_sdma_6120_gethead; + dd->f_sdma_sendctrl = qib_6120_sdma_sendctrl; + dd->f_sdma_set_desc_cnt = qib_sdma_set_6120_desc_cnt; + dd->f_sdma_update_tail = qib_sdma_update_6120_tail; + dd->f_sendctrl = sendctrl_6120_mod; + dd->f_set_armlaunch = qib_set_6120_armlaunch; + dd->f_set_cntr_sample = qib_set_cntr_6120_sample; + dd->f_iblink_state = qib_6120_iblink_state; + dd->f_ibphys_portstate = qib_6120_phys_portstate; + dd->f_get_ib_cfg = qib_6120_get_ib_cfg; + dd->f_set_ib_cfg = qib_6120_set_ib_cfg; + dd->f_set_ib_loopback = qib_6120_set_loopback; + dd->f_set_intr_state = qib_6120_set_intr_state; + dd->f_setextled = qib_6120_setup_setextled; + dd->f_txchk_change = qib_6120_txchk_change; + dd->f_update_usrhead = qib_update_6120_usrhead; + dd->f_wantpiobuf_intr = qib_wantpiobuf_6120_intr; + dd->f_xgxs_reset = qib_6120_xgxs_reset; + dd->f_writescratch = writescratch; + dd->f_tempsense_rd = qib_6120_tempsense_rd; + /* + * Do remaining pcie setup and save pcie values in dd. + * Any error printing is already done by the init code. + * On return, we have the chip mapped and accessible, + * but chip registers are not set up until start of + * init_6120_variables. + */ + ret = qib_pcie_ddinit(dd, pdev, ent); + if (ret < 0) + goto bail_free; + + /* initialize chip-specific variables */ + ret = init_6120_variables(dd); + if (ret) + goto bail_cleanup; + + if (qib_mini_init) + goto bail; + + if (qib_pcie_params(dd, 8, NULL, NULL)) + qib_dev_err(dd, "Failed to setup PCIe or interrupts; " + "continuing anyway\n"); + dd->cspec->irq = pdev->irq; /* save IRQ */ + + /* clear diagctrl register, in case diags were running and crashed */ + qib_write_kreg(dd, kr_hwdiagctrl, 0); + + if (qib_read_kreg64(dd, kr_hwerrstatus) & + QLOGIC_IB_HWE_SERDESPLLFAILED) + qib_write_kreg(dd, kr_hwerrclear, + QLOGIC_IB_HWE_SERDESPLLFAILED); + + /* setup interrupt handler (interrupt type handled above) */ + qib_setup_6120_interrupt(dd); + /* Note that qpn_mask is set by qib_6120_config_ctxts() first */ + qib_6120_init_hwerrors(dd); + + goto bail; + +bail_cleanup: + qib_pcie_ddcleanup(dd); +bail_free: + qib_free_devdata(dd); + dd = ERR_PTR(ret); +bail: + return dd; +} diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c new file mode 100644 index 00000000..3c722f79 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -0,0 +1,4642 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* + * This file contains all of the code that is specific to the + * QLogic_IB 7220 chip (except that specific to the SerDes) + */ + +#include +#include +#include +#include +#include +#include + +#include "qib.h" +#include "qib_7220.h" + +static void qib_setup_7220_setextled(struct qib_pportdata *, u32); +static void qib_7220_handle_hwerrors(struct qib_devdata *, char *, size_t); +static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op); +static u32 qib_7220_iblink_state(u64); +static u8 qib_7220_phys_portstate(u64); +static void qib_sdma_update_7220_tail(struct qib_pportdata *, u16); +static void qib_set_ib_7220_lstate(struct qib_pportdata *, u16, u16); + +/* + * This file contains almost all the chip-specific register information and + * access functions for the QLogic QLogic_IB 7220 PCI-Express chip, with the + * exception of SerDes support, which in in qib_sd7220.c. + */ + +/* Below uses machine-generated qib_chipnum_regs.h file */ +#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64)) + +/* Use defines to tie machine-generated names to lower-case names */ +#define kr_control KREG_IDX(Control) +#define kr_counterregbase KREG_IDX(CntrRegBase) +#define kr_errclear KREG_IDX(ErrClear) +#define kr_errmask KREG_IDX(ErrMask) +#define kr_errstatus KREG_IDX(ErrStatus) +#define kr_extctrl KREG_IDX(EXTCtrl) +#define kr_extstatus KREG_IDX(EXTStatus) +#define kr_gpio_clear KREG_IDX(GPIOClear) +#define kr_gpio_mask KREG_IDX(GPIOMask) +#define kr_gpio_out KREG_IDX(GPIOOut) +#define kr_gpio_status KREG_IDX(GPIOStatus) +#define kr_hrtbt_guid KREG_IDX(HRTBT_GUID) +#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl) +#define kr_hwerrclear KREG_IDX(HwErrClear) +#define kr_hwerrmask KREG_IDX(HwErrMask) +#define kr_hwerrstatus KREG_IDX(HwErrStatus) +#define kr_ibcctrl KREG_IDX(IBCCtrl) +#define kr_ibcddrctrl KREG_IDX(IBCDDRCtrl) +#define kr_ibcddrstatus KREG_IDX(IBCDDRStatus) +#define kr_ibcstatus KREG_IDX(IBCStatus) +#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl) +#define kr_intclear KREG_IDX(IntClear) +#define kr_intmask KREG_IDX(IntMask) +#define kr_intstatus KREG_IDX(IntStatus) +#define kr_ncmodectrl KREG_IDX(IBNCModeCtrl) +#define kr_palign KREG_IDX(PageAlign) +#define kr_partitionkey KREG_IDX(RcvPartitionKey) +#define kr_portcnt KREG_IDX(PortCnt) +#define kr_rcvbthqp KREG_IDX(RcvBTHQP) +#define kr_rcvctrl KREG_IDX(RcvCtrl) +#define kr_rcvegrbase KREG_IDX(RcvEgrBase) +#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt) +#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt) +#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize) +#define kr_rcvhdrsize KREG_IDX(RcvHdrSize) +#define kr_rcvpktledcnt KREG_IDX(RcvPktLEDCnt) +#define kr_rcvtidbase KREG_IDX(RcvTIDBase) +#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt) +#define kr_revision KREG_IDX(Revision) +#define kr_scratch KREG_IDX(Scratch) +#define kr_sendbuffererror KREG_IDX(SendBufErr0) +#define kr_sendctrl KREG_IDX(SendCtrl) +#define kr_senddmabase KREG_IDX(SendDmaBase) +#define kr_senddmabufmask0 KREG_IDX(SendDmaBufMask0) +#define kr_senddmabufmask1 (KREG_IDX(SendDmaBufMask0) + 1) +#define kr_senddmabufmask2 (KREG_IDX(SendDmaBufMask0) + 2) +#define kr_senddmahead KREG_IDX(SendDmaHead) +#define kr_senddmaheadaddr KREG_IDX(SendDmaHeadAddr) +#define kr_senddmalengen KREG_IDX(SendDmaLenGen) +#define kr_senddmastatus KREG_IDX(SendDmaStatus) +#define kr_senddmatail KREG_IDX(SendDmaTail) +#define kr_sendpioavailaddr KREG_IDX(SendBufAvailAddr) +#define kr_sendpiobufbase KREG_IDX(SendBufBase) +#define kr_sendpiobufcnt KREG_IDX(SendBufCnt) +#define kr_sendpiosize KREG_IDX(SendBufSize) +#define kr_sendregbase KREG_IDX(SendRegBase) +#define kr_userregbase KREG_IDX(UserRegBase) +#define kr_xgxs_cfg KREG_IDX(XGXSCfg) + +/* These must only be written via qib_write_kreg_ctxt() */ +#define kr_rcvhdraddr KREG_IDX(RcvHdrAddr0) +#define kr_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0) + + +#define CREG_IDX(regname) ((QIB_7220_##regname##_OFFS - \ + QIB_7220_LBIntCnt_OFFS) / sizeof(u64)) + +#define cr_badformat CREG_IDX(RxVersionErrCnt) +#define cr_erricrc CREG_IDX(RxICRCErrCnt) +#define cr_errlink CREG_IDX(RxLinkMalformCnt) +#define cr_errlpcrc CREG_IDX(RxLPCRCErrCnt) +#define cr_errpkey CREG_IDX(RxPKeyMismatchCnt) +#define cr_rcvflowctrl_err CREG_IDX(RxFlowCtrlViolCnt) +#define cr_err_rlen CREG_IDX(RxLenErrCnt) +#define cr_errslen CREG_IDX(TxLenErrCnt) +#define cr_errtidfull CREG_IDX(RxTIDFullErrCnt) +#define cr_errtidvalid CREG_IDX(RxTIDValidErrCnt) +#define cr_errvcrc CREG_IDX(RxVCRCErrCnt) +#define cr_ibstatuschange CREG_IDX(IBStatusChangeCnt) +#define cr_lbint CREG_IDX(LBIntCnt) +#define cr_invalidrlen CREG_IDX(RxMaxMinLenErrCnt) +#define cr_invalidslen CREG_IDX(TxMaxMinLenErrCnt) +#define cr_lbflowstall CREG_IDX(LBFlowStallCnt) +#define cr_pktrcv CREG_IDX(RxDataPktCnt) +#define cr_pktrcvflowctrl CREG_IDX(RxFlowPktCnt) +#define cr_pktsend CREG_IDX(TxDataPktCnt) +#define cr_pktsendflow CREG_IDX(TxFlowPktCnt) +#define cr_portovfl CREG_IDX(RxP0HdrEgrOvflCnt) +#define cr_rcvebp CREG_IDX(RxEBPCnt) +#define cr_rcvovfl CREG_IDX(RxBufOvflCnt) +#define cr_senddropped CREG_IDX(TxDroppedPktCnt) +#define cr_sendstall CREG_IDX(TxFlowStallCnt) +#define cr_sendunderrun CREG_IDX(TxUnderrunCnt) +#define cr_wordrcv CREG_IDX(RxDwordCnt) +#define cr_wordsend CREG_IDX(TxDwordCnt) +#define cr_txunsupvl CREG_IDX(TxUnsupVLErrCnt) +#define cr_rxdroppkt CREG_IDX(RxDroppedPktCnt) +#define cr_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt) +#define cr_iblinkdown CREG_IDX(IBLinkDownedCnt) +#define cr_ibsymbolerr CREG_IDX(IBSymbolErrCnt) +#define cr_vl15droppedpkt CREG_IDX(RxVL15DroppedPktCnt) +#define cr_rxotherlocalphyerr CREG_IDX(RxOtherLocalPhyErrCnt) +#define cr_excessbufferovfl CREG_IDX(ExcessBufferOvflCnt) +#define cr_locallinkintegrityerr CREG_IDX(LocalLinkIntegrityErrCnt) +#define cr_rxvlerr CREG_IDX(RxVlErrCnt) +#define cr_rxdlidfltr CREG_IDX(RxDlidFltrCnt) +#define cr_psstat CREG_IDX(PSStat) +#define cr_psstart CREG_IDX(PSStart) +#define cr_psinterval CREG_IDX(PSInterval) +#define cr_psrcvdatacount CREG_IDX(PSRcvDataCount) +#define cr_psrcvpktscount CREG_IDX(PSRcvPktsCount) +#define cr_psxmitdatacount CREG_IDX(PSXmitDataCount) +#define cr_psxmitpktscount CREG_IDX(PSXmitPktsCount) +#define cr_psxmitwaitcount CREG_IDX(PSXmitWaitCount) +#define cr_txsdmadesc CREG_IDX(TxSDmaDescCnt) +#define cr_pcieretrydiag CREG_IDX(PcieRetryBufDiagQwordCnt) + +#define SYM_RMASK(regname, fldname) ((u64) \ + QIB_7220_##regname##_##fldname##_RMASK) +#define SYM_MASK(regname, fldname) ((u64) \ + QIB_7220_##regname##_##fldname##_RMASK << \ + QIB_7220_##regname##_##fldname##_LSB) +#define SYM_LSB(regname, fldname) (QIB_7220_##regname##_##fldname##_LSB) +#define SYM_FIELD(value, regname, fldname) ((u64) \ + (((value) >> SYM_LSB(regname, fldname)) & \ + SYM_RMASK(regname, fldname))) +#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask) +#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask) + +/* ibcctrl bits */ +#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 +/* cycle through TS1/TS2 till OK */ +#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2 +/* wait for TS1, then go on */ +#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3 +#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16 + +#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ +#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ +#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ + +#define BLOB_7220_IBCHG 0x81 + +/* + * We could have a single register get/put routine, that takes a group type, + * but this is somewhat clearer and cleaner. It also gives us some error + * checking. 64 bit register reads should always work, but are inefficient + * on opteron (the northbridge always generates 2 separate HT 32 bit reads), + * so we use kreg32 wherever possible. User register and counter register + * reads are always 32 bit reads, so only one form of those routines. + */ + +/** + * qib_read_ureg32 - read 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @ctxt: context number + * + * Return the contents of a register that is virtualized to be per context. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u32 qib_read_ureg32(const struct qib_devdata *dd, + enum qib_ureg regno, int ctxt) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + + if (dd->userbase) + return readl(regno + (u64 __iomem *) + ((char __iomem *)dd->userbase + + dd->ureg_align * ctxt)); + else + return readl(regno + (u64 __iomem *) + (dd->uregbase + + (char __iomem *)dd->kregbase + + dd->ureg_align * ctxt)); +} + +/** + * qib_write_ureg - write 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @value: value + * @ctxt: context + * + * Write the contents of a register that is virtualized to be per context. + */ +static inline void qib_write_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, u64 value, int ctxt) +{ + u64 __iomem *ubase; + + if (dd->userbase) + ubase = (u64 __iomem *) + ((char __iomem *) dd->userbase + + dd->ureg_align * ctxt); + else + ubase = (u64 __iomem *) + (dd->uregbase + + (char __iomem *) dd->kregbase + + dd->ureg_align * ctxt); + + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &ubase[regno]); +} + +/** + * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register + * @dd: the qlogic_ib device + * @regno: the register number to write + * @ctxt: the context containing the register + * @value: the value to write + */ +static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd, + const u16 regno, unsigned ctxt, + u64 value) +{ + qib_write_kreg(dd, regno + ctxt, value); +} + +static inline void write_7220_creg(const struct qib_devdata *dd, + u16 regno, u64 value) +{ + if (dd->cspec->cregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->cspec->cregbase[regno]); +} + +static inline u64 read_7220_creg(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(&dd->cspec->cregbase[regno]); +} + +static inline u32 read_7220_creg32(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(&dd->cspec->cregbase[regno]); +} + +/* kr_revision bits */ +#define QLOGIC_IB_R_EMULATORREV_MASK ((1ULL << 22) - 1) +#define QLOGIC_IB_R_EMULATORREV_SHIFT 40 + +/* kr_control bits */ +#define QLOGIC_IB_C_RESET (1U << 7) + +/* kr_intstatus, kr_intclear, kr_intmask bits */ +#define QLOGIC_IB_I_RCVURG_MASK ((1ULL << 17) - 1) +#define QLOGIC_IB_I_RCVURG_SHIFT 32 +#define QLOGIC_IB_I_RCVAVAIL_MASK ((1ULL << 17) - 1) +#define QLOGIC_IB_I_RCVAVAIL_SHIFT 0 +#define QLOGIC_IB_I_SERDESTRIMDONE (1ULL << 27) + +#define QLOGIC_IB_C_FREEZEMODE 0x00000002 +#define QLOGIC_IB_C_LINKENABLE 0x00000004 + +#define QLOGIC_IB_I_SDMAINT 0x8000000000000000ULL +#define QLOGIC_IB_I_SDMADISABLED 0x4000000000000000ULL +#define QLOGIC_IB_I_ERROR 0x0000000080000000ULL +#define QLOGIC_IB_I_SPIOSENT 0x0000000040000000ULL +#define QLOGIC_IB_I_SPIOBUFAVAIL 0x0000000020000000ULL +#define QLOGIC_IB_I_GPIO 0x0000000010000000ULL + +/* variables for sanity checking interrupt and errors */ +#define QLOGIC_IB_I_BITSEXTANT \ + (QLOGIC_IB_I_SDMAINT | QLOGIC_IB_I_SDMADISABLED | \ + (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT) | \ + (QLOGIC_IB_I_RCVAVAIL_MASK << \ + QLOGIC_IB_I_RCVAVAIL_SHIFT) | \ + QLOGIC_IB_I_ERROR | QLOGIC_IB_I_SPIOSENT | \ + QLOGIC_IB_I_SPIOBUFAVAIL | QLOGIC_IB_I_GPIO | \ + QLOGIC_IB_I_SERDESTRIMDONE) + +#define IB_HWE_BITSEXTANT \ + (HWE_MASK(RXEMemParityErr) | \ + HWE_MASK(TXEMemParityErr) | \ + (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << \ + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) | \ + QLOGIC_IB_HWE_PCIE1PLLFAILED | \ + QLOGIC_IB_HWE_PCIE0PLLFAILED | \ + QLOGIC_IB_HWE_PCIEPOISONEDTLP | \ + QLOGIC_IB_HWE_PCIECPLTIMEOUT | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXTLH | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXADM | \ + QLOGIC_IB_HWE_PCIEBUSPARITYRADM | \ + HWE_MASK(PowerOnBISTFailed) | \ + QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP | \ + QLOGIC_IB_HWE_SERDESPLLFAILED | \ + HWE_MASK(IBCBusToSPCParityErr) | \ + HWE_MASK(IBCBusFromSPCParityErr) | \ + QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR | \ + QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR | \ + QLOGIC_IB_HWE_SDMAMEMREADERR | \ + QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED | \ + QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT | \ + QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT | \ + QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT | \ + QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT | \ + QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR | \ + QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR | \ + QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR | \ + QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR) + +#define IB_E_BITSEXTANT \ + (ERR_MASK(RcvFormatErr) | ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvLongPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvUnexpectedCharErr) | \ + ERR_MASK(RcvUnsupportedVLErr) | ERR_MASK(RcvEBPErr) | \ + ERR_MASK(RcvIBFlowErr) | ERR_MASK(RcvBadVersionErr) | \ + ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | \ + ERR_MASK(RcvBadTidErr) | ERR_MASK(RcvHdrLenErr) | \ + ERR_MASK(RcvHdrErr) | ERR_MASK(RcvIBLostLinkErr) | \ + ERR_MASK(SendSpecialTriggerErr) | \ + ERR_MASK(SDmaDisabledErr) | ERR_MASK(SendMinPktLenErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnderRunErr) | \ + ERR_MASK(SendPktLenErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendPioArmLaunchErr) | \ + ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendUnsupportedVLErr) | ERR_MASK(SendBufMisuseErr) | \ + ERR_MASK(SDmaGenMismatchErr) | ERR_MASK(SDmaOutOfBoundErr) | \ + ERR_MASK(SDmaTailOutOfBoundErr) | ERR_MASK(SDmaBaseErr) | \ + ERR_MASK(SDma1stDescErr) | ERR_MASK(SDmaRpyTagErr) | \ + ERR_MASK(SDmaDwEnErr) | ERR_MASK(SDmaMissingDwErr) | \ + ERR_MASK(SDmaUnexpDataErr) | \ + ERR_MASK(IBStatusChanged) | ERR_MASK(InvalidAddrErr) | \ + ERR_MASK(ResetNegated) | ERR_MASK(HardwareErr) | \ + ERR_MASK(SDmaDescAddrMisalignErr) | \ + ERR_MASK(InvalidEEPCmd)) + +/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK 0x00000000000000ffULL +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT 0 +#define QLOGIC_IB_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL +#define QLOGIC_IB_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL +#define QLOGIC_IB_HWE_COREPLL_FBSLIP 0x0080000000000000ULL +#define QLOGIC_IB_HWE_COREPLL_RFSLIP 0x0100000000000000ULL +#define QLOGIC_IB_HWE_PCIE1PLLFAILED 0x0400000000000000ULL +#define QLOGIC_IB_HWE_PCIE0PLLFAILED 0x0800000000000000ULL +#define QLOGIC_IB_HWE_SERDESPLLFAILED 0x1000000000000000ULL +/* specific to this chip */ +#define QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR 0x0000000000000040ULL +#define QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR 0x0000000000000080ULL +#define QLOGIC_IB_HWE_SDMAMEMREADERR 0x0000000010000000ULL +#define QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED 0x2000000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT 0x0100000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT 0x0200000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT 0x0400000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT 0x0800000000000000ULL +#define QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR 0x0000008000000000ULL +#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL +#define QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR 0x0000001000000000ULL +#define QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR 0x0000002000000000ULL + +#define IBA7220_IBCC_LINKCMD_SHIFT 19 + +/* kr_ibcddrctrl bits */ +#define IBA7220_IBC_DLIDLMC_MASK 0xFFFFFFFFUL +#define IBA7220_IBC_DLIDLMC_SHIFT 32 + +#define IBA7220_IBC_HRTBT_MASK (SYM_RMASK(IBCDDRCtrl, HRTBT_AUTO) | \ + SYM_RMASK(IBCDDRCtrl, HRTBT_ENB)) +#define IBA7220_IBC_HRTBT_SHIFT SYM_LSB(IBCDDRCtrl, HRTBT_ENB) + +#define IBA7220_IBC_LANE_REV_SUPPORTED (1<<8) +#define IBA7220_IBC_LREV_MASK 1 +#define IBA7220_IBC_LREV_SHIFT 8 +#define IBA7220_IBC_RXPOL_MASK 1 +#define IBA7220_IBC_RXPOL_SHIFT 7 +#define IBA7220_IBC_WIDTH_SHIFT 5 +#define IBA7220_IBC_WIDTH_MASK 0x3 +#define IBA7220_IBC_WIDTH_1X_ONLY (0 << IBA7220_IBC_WIDTH_SHIFT) +#define IBA7220_IBC_WIDTH_4X_ONLY (1 << IBA7220_IBC_WIDTH_SHIFT) +#define IBA7220_IBC_WIDTH_AUTONEG (2 << IBA7220_IBC_WIDTH_SHIFT) +#define IBA7220_IBC_SPEED_AUTONEG (1 << 1) +#define IBA7220_IBC_SPEED_SDR (1 << 2) +#define IBA7220_IBC_SPEED_DDR (1 << 3) +#define IBA7220_IBC_SPEED_AUTONEG_MASK (0x7 << 1) +#define IBA7220_IBC_IBTA_1_2_MASK (1) + +/* kr_ibcddrstatus */ +/* link latency shift is 0, don't bother defining */ +#define IBA7220_DDRSTAT_LINKLAT_MASK 0x3ffffff + +/* kr_extstatus bits */ +#define QLOGIC_IB_EXTS_FREQSEL 0x2 +#define QLOGIC_IB_EXTS_SERDESSEL 0x4 +#define QLOGIC_IB_EXTS_MEMBIST_ENDTEST 0x0000000000004000 +#define QLOGIC_IB_EXTS_MEMBIST_DISABLED 0x0000000000008000 + +/* kr_xgxsconfig bits */ +#define QLOGIC_IB_XGXS_RESET 0x5ULL +#define QLOGIC_IB_XGXS_FC_SAFE (1ULL << 63) + +/* kr_rcvpktledcnt */ +#define IBA7220_LEDBLINK_ON_SHIFT 32 /* 4ns period on after packet */ +#define IBA7220_LEDBLINK_OFF_SHIFT 0 /* 4ns period off before next on */ + +#define _QIB_GPIO_SDA_NUM 1 +#define _QIB_GPIO_SCL_NUM 0 +#define QIB_TWSI_EEPROM_DEV 0xA2 /* All Production 7220 cards. */ +#define QIB_TWSI_TEMP_DEV 0x98 + +/* HW counter clock is at 4nsec */ +#define QIB_7220_PSXMITWAIT_CHECK_RATE 4000 + +#define IBA7220_R_INTRAVAIL_SHIFT 17 +#define IBA7220_R_PKEY_DIS_SHIFT 34 +#define IBA7220_R_TAILUPD_SHIFT 35 +#define IBA7220_R_CTXTCFG_SHIFT 36 + +#define IBA7220_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */ + +/* + * the size bits give us 2^N, in KB units. 0 marks as invalid, + * and 7 is reserved. We currently use only 2KB and 4KB + */ +#define IBA7220_TID_SZ_SHIFT 37 /* shift to 3bit size selector */ +#define IBA7220_TID_SZ_2K (1UL << IBA7220_TID_SZ_SHIFT) /* 2KB */ +#define IBA7220_TID_SZ_4K (2UL << IBA7220_TID_SZ_SHIFT) /* 4KB */ +#define IBA7220_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */ +#define PBC_7220_VL15_SEND (1ULL << 63) /* pbc; VL15, no credit check */ +#define PBC_7220_VL15_SEND_CTRL (1ULL << 31) /* control version of same */ + +#define AUTONEG_TRIES 5 /* sequential retries to negotiate DDR */ + +/* packet rate matching delay multiplier */ +static u8 rate_to_delay[2][2] = { + /* 1x, 4x */ + { 8, 2 }, /* SDR */ + { 4, 1 } /* DDR */ +}; + +static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = { + [IB_RATE_2_5_GBPS] = 8, + [IB_RATE_5_GBPS] = 4, + [IB_RATE_10_GBPS] = 2, + [IB_RATE_20_GBPS] = 1 +}; + +#define IBA7220_LINKSPEED_SHIFT SYM_LSB(IBCStatus, LinkSpeedActive) +#define IBA7220_LINKWIDTH_SHIFT SYM_LSB(IBCStatus, LinkWidthActive) + +/* link training states, from IBC */ +#define IB_7220_LT_STATE_DISABLED 0x00 +#define IB_7220_LT_STATE_LINKUP 0x01 +#define IB_7220_LT_STATE_POLLACTIVE 0x02 +#define IB_7220_LT_STATE_POLLQUIET 0x03 +#define IB_7220_LT_STATE_SLEEPDELAY 0x04 +#define IB_7220_LT_STATE_SLEEPQUIET 0x05 +#define IB_7220_LT_STATE_CFGDEBOUNCE 0x08 +#define IB_7220_LT_STATE_CFGRCVFCFG 0x09 +#define IB_7220_LT_STATE_CFGWAITRMT 0x0a +#define IB_7220_LT_STATE_CFGIDLE 0x0b +#define IB_7220_LT_STATE_RECOVERRETRAIN 0x0c +#define IB_7220_LT_STATE_RECOVERWAITRMT 0x0e +#define IB_7220_LT_STATE_RECOVERIDLE 0x0f + +/* link state machine states from IBC */ +#define IB_7220_L_STATE_DOWN 0x0 +#define IB_7220_L_STATE_INIT 0x1 +#define IB_7220_L_STATE_ARM 0x2 +#define IB_7220_L_STATE_ACTIVE 0x3 +#define IB_7220_L_STATE_ACT_DEFER 0x4 + +static const u8 qib_7220_physportstate[0x20] = { + [IB_7220_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [IB_7220_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [IB_7220_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [IB_7220_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [IB_7220_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [IB_7220_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [IB_7220_LT_STATE_CFGDEBOUNCE] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_7220_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_7220_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN +}; + +int qib_special_trigger; +module_param_named(special_trigger, qib_special_trigger, int, S_IRUGO); +MODULE_PARM_DESC(special_trigger, "Enable SpecialTrigger arm/launch"); + +#define IBCBUSFRSPCPARITYERR HWE_MASK(IBCBusFromSPCParityErr) +#define IBCBUSTOSPCPARITYERR HWE_MASK(IBCBusToSPCParityErr) + +#define SYM_MASK_BIT(regname, fldname, bit) ((u64) \ + (1ULL << (SYM_LSB(regname, fldname) + (bit)))) + +#define TXEMEMPARITYERR_PIOBUF \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 0) +#define TXEMEMPARITYERR_PIOPBC \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 1) +#define TXEMEMPARITYERR_PIOLAUNCHFIFO \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 2) + +#define RXEMEMPARITYERR_RCVBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 0) +#define RXEMEMPARITYERR_LOOKUPQ \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 1) +#define RXEMEMPARITYERR_EXPTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 2) +#define RXEMEMPARITYERR_EAGERTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 3) +#define RXEMEMPARITYERR_FLAGBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 4) +#define RXEMEMPARITYERR_DATAINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 5) +#define RXEMEMPARITYERR_HDRINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 6) + +/* 7220 specific hardware errors... */ +static const struct qib_hwerror_msgs qib_7220_hwerror_msgs[] = { + /* generic hardware errors */ + QLOGIC_IB_HWE_MSG(IBCBUSFRSPCPARITYERR, "QIB2IB Parity"), + QLOGIC_IB_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2QIB Parity"), + + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOBUF, + "TXE PIOBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOPBC, + "TXE PIOPBC Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOLAUNCHFIFO, + "TXE PIOLAUNCHFIFO Memory Parity"), + + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_RCVBUF, + "RXE RCVBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_LOOKUPQ, + "RXE LOOKUPQ Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EAGERTID, + "RXE EAGERTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EXPTID, + "RXE EXPTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_FLAGBUF, + "RXE FLAGBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_DATAINFO, + "RXE DATAINFO Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_HDRINFO, + "RXE HDRINFO Memory Parity"), + + /* chip-specific hardware errors */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEPOISONEDTLP, + "PCIe Poisoned TLP"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT, + "PCIe completion timeout"), + /* + * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * parity or memory parity error failures, because most likely we + * won't be able to talk to the core of the chip. Nonetheless, we + * might see them, if they are in parts of the PCIe core that aren't + * essential. + */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE1PLLFAILED, + "PCIePLL1"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE0PLLFAILED, + "PCIePLL0"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXTLH, + "PCIe XTLH core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXADM, + "PCIe ADM TX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYRADM, + "PCIe ADM RX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SERDESPLLFAILED, + "SerDes PLL"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR, + "PCIe cpl header queue"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR, + "PCIe cpl data queue"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SDMAMEMREADERR, + "Send DMA memory read"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED, + "uC PLL clock not locked"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT, + "PCIe serdes Q0 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT, + "PCIe serdes Q1 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT, + "PCIe serdes Q2 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT, + "PCIe serdes Q3 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR, + "DDS RXEQ memory parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR, + "IB uC memory parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR, + "PCIe uC oct0 memory parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR, + "PCIe uC oct1 memory parity"), +}; + +#define RXE_PARITY (RXEMEMPARITYERR_EAGERTID|RXEMEMPARITYERR_EXPTID) + +#define QLOGIC_IB_E_PKTERRS (\ + ERR_MASK(SendPktLenErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | \ + ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvEBPErr)) + +/* Convenience for decoding Send DMA errors */ +#define QLOGIC_IB_E_SDMAERRS ( \ + ERR_MASK(SDmaGenMismatchErr) | \ + ERR_MASK(SDmaOutOfBoundErr) | \ + ERR_MASK(SDmaTailOutOfBoundErr) | ERR_MASK(SDmaBaseErr) | \ + ERR_MASK(SDma1stDescErr) | ERR_MASK(SDmaRpyTagErr) | \ + ERR_MASK(SDmaDwEnErr) | ERR_MASK(SDmaMissingDwErr) | \ + ERR_MASK(SDmaUnexpDataErr) | \ + ERR_MASK(SDmaDescAddrMisalignErr) | \ + ERR_MASK(SDmaDisabledErr) | \ + ERR_MASK(SendBufMisuseErr)) + +/* These are all rcv-related errors which we want to count for stats */ +#define E_SUM_PKTERRS \ + (ERR_MASK(RcvHdrLenErr) | ERR_MASK(RcvBadTidErr) | \ + ERR_MASK(RcvBadVersionErr) | ERR_MASK(RcvHdrErr) | \ + ERR_MASK(RcvLongPktLenErr) | ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvFormatErr) | ERR_MASK(RcvUnsupportedVLErr) | \ + ERR_MASK(RcvUnexpectedCharErr) | ERR_MASK(RcvEBPErr)) + +/* These are all send-related errors which we want to count for stats */ +#define E_SUM_ERRS \ + (ERR_MASK(SendPioArmLaunchErr) | ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnsupportedVLErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(InvalidAddrErr)) + +/* + * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore + * errors not related to freeze and cancelling buffers. Can't ignore + * armlaunch because could get more while still cleaning up, and need + * to cancel those as they happen. + */ +#define E_SPKT_ERRS_IGNORE \ + (ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendMinPktLenErr) | \ + ERR_MASK(SendPktLenErr)) + +/* + * these are errors that can occur when the link changes state while + * a packet is being sent or received. This doesn't cover things + * like EBP or VCRC that can be the result of a sending having the + * link change state, so we receive a "known bad" packet. + */ +#define E_SUM_LINK_PKTERRS \ + (ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvUnexpectedCharErr)) + +static void autoneg_7220_work(struct work_struct *); +static u32 __iomem *qib_7220_getsendbuf(struct qib_pportdata *, u64, u32 *); + +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + * because we don't need to force the update of pioavail. + */ +static void qib_disarm_7220_senderrbufs(struct qib_pportdata *ppd) +{ + unsigned long sbuf[3]; + struct qib_devdata *dd = ppd->dd; + + /* + * It's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling. + */ + /* read these before writing errorclear */ + sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror); + sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1); + sbuf[2] = qib_read_kreg64(dd, kr_sendbuffererror + 2); + + if (sbuf[0] || sbuf[1] || sbuf[2]) + qib_disarm_piobufs_set(dd, sbuf, + dd->piobcnt2k + dd->piobcnt4k); +} + +static void qib_7220_txe_recover(struct qib_devdata *dd) +{ + qib_devinfo(dd->pcidev, "Recovering from TXE PIO parity error\n"); + qib_disarm_7220_senderrbufs(dd->pport); +} + +/* + * This is called with interrupts disabled and sdma_lock held. + */ +static void qib_7220_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op) +{ + struct qib_devdata *dd = ppd->dd; + u64 set_sendctrl = 0; + u64 clr_sendctrl = 0; + + if (op & QIB_SDMA_SENDCTRL_OP_ENABLE) + set_sendctrl |= SYM_MASK(SendCtrl, SDmaEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl, SDmaEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_INTENABLE) + set_sendctrl |= SYM_MASK(SendCtrl, SDmaIntEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl, SDmaIntEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_HALT) + set_sendctrl |= SYM_MASK(SendCtrl, SDmaHalt); + else + clr_sendctrl |= SYM_MASK(SendCtrl, SDmaHalt); + + spin_lock(&dd->sendctrl_lock); + + dd->sendctrl |= set_sendctrl; + dd->sendctrl &= ~clr_sendctrl; + + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + spin_unlock(&dd->sendctrl_lock); +} + +static void qib_decode_7220_sdma_errs(struct qib_pportdata *ppd, + u64 err, char *buf, size_t blen) +{ + static const struct { + u64 err; + const char *msg; + } errs[] = { + { ERR_MASK(SDmaGenMismatchErr), + "SDmaGenMismatch" }, + { ERR_MASK(SDmaOutOfBoundErr), + "SDmaOutOfBound" }, + { ERR_MASK(SDmaTailOutOfBoundErr), + "SDmaTailOutOfBound" }, + { ERR_MASK(SDmaBaseErr), + "SDmaBase" }, + { ERR_MASK(SDma1stDescErr), + "SDma1stDesc" }, + { ERR_MASK(SDmaRpyTagErr), + "SDmaRpyTag" }, + { ERR_MASK(SDmaDwEnErr), + "SDmaDwEn" }, + { ERR_MASK(SDmaMissingDwErr), + "SDmaMissingDw" }, + { ERR_MASK(SDmaUnexpDataErr), + "SDmaUnexpData" }, + { ERR_MASK(SDmaDescAddrMisalignErr), + "SDmaDescAddrMisalign" }, + { ERR_MASK(SendBufMisuseErr), + "SendBufMisuse" }, + { ERR_MASK(SDmaDisabledErr), + "SDmaDisabled" }, + }; + int i; + size_t bidx = 0; + + for (i = 0; i < ARRAY_SIZE(errs); i++) { + if (err & errs[i].err) + bidx += scnprintf(buf + bidx, blen - bidx, + "%s ", errs[i].msg); + } +} + +/* + * This is called as part of link down clean up so disarm and flush + * all send buffers so that SMP packets can be sent. + */ +static void qib_7220_sdma_hw_clean_up(struct qib_pportdata *ppd) +{ + /* This will trigger the Abort interrupt */ + sendctrl_7220_mod(ppd, QIB_SENDCTRL_DISARM_ALL | QIB_SENDCTRL_FLUSH | + QIB_SENDCTRL_AVAIL_BLIP); + ppd->dd->upd_pio_shadow = 1; /* update our idea of what's busy */ +} + +static void qib_sdma_7220_setlengen(struct qib_pportdata *ppd) +{ + /* + * Set SendDmaLenGen and clear and set + * the MSB of the generation count to enable generation checking + * and load the internal generation counter. + */ + qib_write_kreg(ppd->dd, kr_senddmalengen, ppd->sdma_descq_cnt); + qib_write_kreg(ppd->dd, kr_senddmalengen, + ppd->sdma_descq_cnt | + (1ULL << QIB_7220_SendDmaLenGen_Generation_MSB)); +} + +static void qib_7220_sdma_hw_start_up(struct qib_pportdata *ppd) +{ + qib_sdma_7220_setlengen(ppd); + qib_sdma_update_7220_tail(ppd, 0); /* Set SendDmaTail */ + ppd->sdma_head_dma[0] = 0; +} + +#define DISABLES_SDMA ( \ + ERR_MASK(SDmaDisabledErr) | \ + ERR_MASK(SDmaBaseErr) | \ + ERR_MASK(SDmaTailOutOfBoundErr) | \ + ERR_MASK(SDmaOutOfBoundErr) | \ + ERR_MASK(SDma1stDescErr) | \ + ERR_MASK(SDmaRpyTagErr) | \ + ERR_MASK(SDmaGenMismatchErr) | \ + ERR_MASK(SDmaDescAddrMisalignErr) | \ + ERR_MASK(SDmaMissingDwErr) | \ + ERR_MASK(SDmaDwEnErr)) + +static void sdma_7220_errors(struct qib_pportdata *ppd, u64 errs) +{ + unsigned long flags; + struct qib_devdata *dd = ppd->dd; + char *msg; + + errs &= QLOGIC_IB_E_SDMAERRS; + + msg = dd->cspec->sdmamsgbuf; + qib_decode_7220_sdma_errs(ppd, errs, msg, sizeof dd->cspec->sdmamsgbuf); + spin_lock_irqsave(&ppd->sdma_lock, flags); + + if (errs & ERR_MASK(SendBufMisuseErr)) { + unsigned long sbuf[3]; + + sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror); + sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1); + sbuf[2] = qib_read_kreg64(dd, kr_sendbuffererror + 2); + + qib_dev_err(ppd->dd, + "IB%u:%u SendBufMisuse: %04lx %016lx %016lx\n", + ppd->dd->unit, ppd->port, sbuf[2], sbuf[1], + sbuf[0]); + } + + if (errs & ERR_MASK(SDmaUnexpDataErr)) + qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", ppd->dd->unit, + ppd->port); + + switch (ppd->sdma_state.current_state) { + case qib_sdma_state_s00_hw_down: + /* not expecting any interrupts */ + break; + + case qib_sdma_state_s10_hw_start_up_wait: + /* handled in intr path */ + break; + + case qib_sdma_state_s20_idle: + /* not expecting any interrupts */ + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + /* not expecting any interrupts */ + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + if (errs & ERR_MASK(SDmaDisabledErr)) + __qib_sdma_process_event(ppd, + qib_sdma_event_e50_hw_cleaned); + break; + + case qib_sdma_state_s50_hw_halt_wait: + /* handled in intr path */ + break; + + case qib_sdma_state_s99_running: + if (errs & DISABLES_SDMA) + __qib_sdma_process_event(ppd, + qib_sdma_event_e7220_err_halted); + break; + } + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +/* + * Decode the error status into strings, deciding whether to always + * print * it or not depending on "normal packet errors" vs everything + * else. Return 1 if "real" errors, otherwise 0 if only packet + * errors, so caller can decide what to print with the string. + */ +static int qib_decode_7220_err(struct qib_devdata *dd, char *buf, size_t blen, + u64 err) +{ + int iserr = 1; + + *buf = '\0'; + if (err & QLOGIC_IB_E_PKTERRS) { + if (!(err & ~QLOGIC_IB_E_PKTERRS)) + iserr = 0; + if ((err & ERR_MASK(RcvICRCErr)) && + !(err & (ERR_MASK(RcvVCRCErr) | ERR_MASK(RcvEBPErr)))) + strlcat(buf, "CRC ", blen); + if (!iserr) + goto done; + } + if (err & ERR_MASK(RcvHdrLenErr)) + strlcat(buf, "rhdrlen ", blen); + if (err & ERR_MASK(RcvBadTidErr)) + strlcat(buf, "rbadtid ", blen); + if (err & ERR_MASK(RcvBadVersionErr)) + strlcat(buf, "rbadversion ", blen); + if (err & ERR_MASK(RcvHdrErr)) + strlcat(buf, "rhdr ", blen); + if (err & ERR_MASK(SendSpecialTriggerErr)) + strlcat(buf, "sendspecialtrigger ", blen); + if (err & ERR_MASK(RcvLongPktLenErr)) + strlcat(buf, "rlongpktlen ", blen); + if (err & ERR_MASK(RcvMaxPktLenErr)) + strlcat(buf, "rmaxpktlen ", blen); + if (err & ERR_MASK(RcvMinPktLenErr)) + strlcat(buf, "rminpktlen ", blen); + if (err & ERR_MASK(SendMinPktLenErr)) + strlcat(buf, "sminpktlen ", blen); + if (err & ERR_MASK(RcvFormatErr)) + strlcat(buf, "rformaterr ", blen); + if (err & ERR_MASK(RcvUnsupportedVLErr)) + strlcat(buf, "runsupvl ", blen); + if (err & ERR_MASK(RcvUnexpectedCharErr)) + strlcat(buf, "runexpchar ", blen); + if (err & ERR_MASK(RcvIBFlowErr)) + strlcat(buf, "ribflow ", blen); + if (err & ERR_MASK(SendUnderRunErr)) + strlcat(buf, "sunderrun ", blen); + if (err & ERR_MASK(SendPioArmLaunchErr)) + strlcat(buf, "spioarmlaunch ", blen); + if (err & ERR_MASK(SendUnexpectedPktNumErr)) + strlcat(buf, "sunexperrpktnum ", blen); + if (err & ERR_MASK(SendDroppedSmpPktErr)) + strlcat(buf, "sdroppedsmppkt ", blen); + if (err & ERR_MASK(SendMaxPktLenErr)) + strlcat(buf, "smaxpktlen ", blen); + if (err & ERR_MASK(SendUnsupportedVLErr)) + strlcat(buf, "sunsupVL ", blen); + if (err & ERR_MASK(InvalidAddrErr)) + strlcat(buf, "invalidaddr ", blen); + if (err & ERR_MASK(RcvEgrFullErr)) + strlcat(buf, "rcvegrfull ", blen); + if (err & ERR_MASK(RcvHdrFullErr)) + strlcat(buf, "rcvhdrfull ", blen); + if (err & ERR_MASK(IBStatusChanged)) + strlcat(buf, "ibcstatuschg ", blen); + if (err & ERR_MASK(RcvIBLostLinkErr)) + strlcat(buf, "riblostlink ", blen); + if (err & ERR_MASK(HardwareErr)) + strlcat(buf, "hardware ", blen); + if (err & ERR_MASK(ResetNegated)) + strlcat(buf, "reset ", blen); + if (err & QLOGIC_IB_E_SDMAERRS) + qib_decode_7220_sdma_errs(dd->pport, err, buf, blen); + if (err & ERR_MASK(InvalidEEPCmd)) + strlcat(buf, "invalideepromcmd ", blen); +done: + return iserr; +} + +static void reenable_7220_chase(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + ppd->cpspec->chase_timer.expires = 0; + qib_set_ib_7220_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_POLL); +} + +static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst) +{ + u8 ibclt; + unsigned long tnow; + + ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState); + + /* + * Detect and handle the state chase issue, where we can + * get stuck if we are unlucky on timing on both sides of + * the link. If we are, we disable, set a timer, and + * then re-enable. + */ + switch (ibclt) { + case IB_7220_LT_STATE_CFGRCVFCFG: + case IB_7220_LT_STATE_CFGWAITRMT: + case IB_7220_LT_STATE_TXREVLANES: + case IB_7220_LT_STATE_CFGENH: + tnow = jiffies; + if (ppd->cpspec->chase_end && + time_after(tnow, ppd->cpspec->chase_end)) { + ppd->cpspec->chase_end = 0; + qib_set_ib_7220_lstate(ppd, + QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + ppd->cpspec->chase_timer.expires = jiffies + + QIB_CHASE_DIS_TIME; + add_timer(&ppd->cpspec->chase_timer); + } else if (!ppd->cpspec->chase_end) + ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME; + break; + + default: + ppd->cpspec->chase_end = 0; + break; + } +} + +static void handle_7220_errors(struct qib_devdata *dd, u64 errs) +{ + char *msg; + u64 ignore_this_time = 0; + u64 iserr = 0; + int log_idx; + struct qib_pportdata *ppd = dd->pport; + u64 mask; + + /* don't report errors that are masked */ + errs &= dd->cspec->errormask; + msg = dd->cspec->emsgbuf; + + /* do these first, they are most important */ + if (errs & ERR_MASK(HardwareErr)) + qib_7220_handle_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf); + else + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (errs & dd->eep_st_masks[log_idx].errs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + if (errs & QLOGIC_IB_E_SDMAERRS) + sdma_7220_errors(ppd, errs); + + if (errs & ~IB_E_BITSEXTANT) + qib_dev_err(dd, "error interrupt with unknown errors " + "%llx set\n", (unsigned long long) + (errs & ~IB_E_BITSEXTANT)); + + if (errs & E_SUM_ERRS) { + qib_disarm_7220_senderrbufs(ppd); + if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when trying to bring the link + * up, but the IB link changes state at the "wrong" + * time. The IB logic then complains that the packet + * isn't valid. We don't want to confuse people, so + * we just don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + } else if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + + qib_write_kreg(dd, kr_errclear, errs); + + errs &= ~ignore_this_time; + if (!errs) + goto done; + + /* + * The ones we mask off are handled specially below + * or above. Also mask SDMADISABLED by default as it + * is too chatty. + */ + mask = ERR_MASK(IBStatusChanged) | + ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | + ERR_MASK(HardwareErr) | ERR_MASK(SDmaDisabledErr); + + qib_decode_7220_err(dd, msg, sizeof dd->cspec->emsgbuf, errs & ~mask); + + if (errs & E_SUM_PKTERRS) + qib_stats.sps_rcverrs++; + if (errs & E_SUM_ERRS) + qib_stats.sps_txerrs++; + iserr = errs & ~(E_SUM_PKTERRS | QLOGIC_IB_E_PKTERRS | + ERR_MASK(SDmaDisabledErr)); + + if (errs & ERR_MASK(IBStatusChanged)) { + u64 ibcs; + + ibcs = qib_read_kreg64(dd, kr_ibcstatus); + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + handle_7220_chase(ppd, ibcs); + + /* Update our picture of width and speed from chip */ + ppd->link_width_active = + ((ibcs >> IBA7220_LINKWIDTH_SHIFT) & 1) ? + IB_WIDTH_4X : IB_WIDTH_1X; + ppd->link_speed_active = + ((ibcs >> IBA7220_LINKSPEED_SHIFT) & 1) ? + QIB_IB_DDR : QIB_IB_SDR; + + /* + * Since going into a recovery state causes the link state + * to go down and since recovery is transitory, it is better + * if we "miss" ever seeing the link training state go into + * recovery (i.e., ignore this transition for link state + * special handling purposes) without updating lastibcstat. + */ + if (qib_7220_phys_portstate(ibcs) != + IB_PHYSPORTSTATE_LINK_ERR_RECOVER) + qib_handle_e_ibstatuschanged(ppd, ibcs); + } + + if (errs & ERR_MASK(ResetNegated)) { + qib_dev_err(dd, "Got reset, requires re-init " + "(unload and reload driver)\n"); + dd->flags &= ~QIB_INITTED; /* needs re-init */ + /* mark as having had error */ + *dd->devstatusp |= QIB_STATUS_HWERROR; + *dd->pport->statusp &= ~QIB_STATUS_IB_CONF; + } + + if (*msg && iserr) + qib_dev_porterr(dd, ppd->port, "%s error\n", msg); + + if (ppd->state_wanted & ppd->lflags) + wake_up_interruptible(&ppd->state_wait); + + /* + * If there were hdrq or egrfull errors, wake up any processes + * waiting in poll. We used to try to check which contexts had + * the overflow, but given the cost of that and the chip reads + * to support it, it's better to just wake everybody up if we + * get an overflow; waiters can poll again if it's not them. + */ + if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) { + qib_handle_urcv(dd, ~0U); + if (errs & ERR_MASK(RcvEgrFullErr)) + qib_stats.sps_buffull++; + else + qib_stats.sps_hdrfull++; + } +done: + return; +} + +/* enable/disable chip from delivering interrupts */ +static void qib_7220_set_intr_state(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + if (dd->flags & QIB_BADINTR) + return; + qib_write_kreg(dd, kr_intmask, ~0ULL); + /* force re-interrupt of any pending interrupts. */ + qib_write_kreg(dd, kr_intclear, 0ULL); + } else + qib_write_kreg(dd, kr_intmask, 0ULL); +} + +/* + * Try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + * This is in chip-specific code because of all of the register accesses, + * even though the details are similar on most chips. + */ +static void qib_7220_clear_freeze(struct qib_devdata *dd) +{ + /* disable error interrupts, to avoid confusion */ + qib_write_kreg(dd, kr_errmask, 0ULL); + + /* also disable interrupts; errormask is sometimes overwriten */ + qib_7220_set_intr_state(dd, 0); + + qib_cancel_sends(dd->pport); + + /* clear the freeze, and be sure chip saw it */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + + /* force in-memory update now we are out of freeze */ + qib_force_pio_avail_update(dd); + + /* + * force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); + qib_7220_set_intr_state(dd, 1); +} + +/** + * qib_7220_handle_hwerrors - display hardware errors. + * @dd: the qlogic_ib device + * @msg: the output buffer + * @msgl: the size of the output buffer + * + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * handle_7220_errors() to avoid excessive stack usage. + */ +static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, + size_t msgl) +{ + u64 hwerrs; + u32 bits, ctrl; + int isfatal = 0; + char *bitsmsg; + int log_idx; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + if (!hwerrs) + goto bail; + if (hwerrs == ~0ULL) { + qib_dev_err(dd, "Read of hardware error status failed " + "(all bits set); ignoring\n"); + goto bail; + } + qib_stats.sps_hwerrs++; + + /* + * Always clear the error status register, except MEMBISTFAIL, + * regardless of whether we continue or stop using the chip. + * We want that set so we know it failed, even across driver reload. + * We'll still ignore it in the hwerrmask. We do this partly for + * diagnostics, but also for support. + */ + qib_write_kreg(dd, kr_hwerrclear, + hwerrs & ~HWE_MASK(PowerOnBISTFailed)); + + hwerrs &= dd->cspec->hwerrmask; + + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | + RXE_PARITY)) + qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " + "(cleared)\n", (unsigned long long) hwerrs); + + if (hwerrs & ~IB_HWE_BITSEXTANT) + qib_dev_err(dd, "hwerror interrupt with unknown errors " + "%llx set\n", (unsigned long long) + (hwerrs & ~IB_HWE_BITSEXTANT)); + + if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) + qib_sd7220_clr_ibpar(dd); + + ctrl = qib_read_kreg32(dd, kr_control); + if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { + /* + * Parity errors in send memory are recoverable by h/w + * just do housekeeping, exit freeze mode and continue. + */ + if (hwerrs & (TXEMEMPARITYERR_PIOBUF | + TXEMEMPARITYERR_PIOPBC)) { + qib_7220_txe_recover(dd); + hwerrs &= ~(TXEMEMPARITYERR_PIOBUF | + TXEMEMPARITYERR_PIOPBC); + } + if (hwerrs) + isfatal = 1; + else + qib_7220_clear_freeze(dd); + } + + *msg = '\0'; + + if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { + isfatal = 1; + strlcat(msg, "[Memory BIST test failed, " + "InfiniPath hardware unusable]", msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + qib_format_hwerrors(hwerrs, qib_7220_hwerror_msgs, + ARRAY_SIZE(qib_7220_hwerror_msgs), msg, msgl); + + bitsmsg = dd->cspec->bitsmsgbuf; + if (hwerrs & (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT)) { + bits = (u32) ((hwerrs >> + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) & + QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK); + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PCIe Mem Parity Errs %x] ", bits); + strlcat(msg, bitsmsg, msgl); + } + +#define _QIB_PLL_FAIL (QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP) + + if (hwerrs & _QIB_PLL_FAIL) { + isfatal = 1; + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PLL failed (%llx), InfiniPath hardware unusable]", + (unsigned long long) hwerrs & _QIB_PLL_FAIL); + strlcat(msg, bitsmsg, msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~(hwerrs & _QIB_PLL_FAIL); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + if (hwerrs & QLOGIC_IB_HWE_SERDESPLLFAILED) { + /* + * If it occurs, it is left masked since the eternal + * interface is unused. + */ + dd->cspec->hwerrmask &= ~QLOGIC_IB_HWE_SERDESPLLFAILED; + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + qib_dev_err(dd, "%s hardware error\n", msg); + + if (isfatal && !dd->diag_client) { + qib_dev_err(dd, "Fatal Hardware Error, no longer" + " usable, SN %.16s\n", dd->serial); + /* + * For /sys status file and user programs to print; if no + * trailing brace is copied, we'll know it was truncated. + */ + if (dd->freezemsg) + snprintf(dd->freezemsg, dd->freezelen, + "{%s}", msg); + qib_disable_after_error(dd); + } +bail:; +} + +/** + * qib_7220_init_hwerrors - enable hardware errors + * @dd: the qlogic_ib device + * + * now that we have finished initializing everything that might reasonably + * cause a hardware error, and cleared those errors bits as they occur, + * we can enable hardware errors in the mask (potentially enabling + * freeze mode), and enable hardware errors as errors (along with + * everything else) in errormask + */ +static void qib_7220_init_hwerrors(struct qib_devdata *dd) +{ + u64 val; + u64 extsval; + + extsval = qib_read_kreg64(dd, kr_extstatus); + + if (!(extsval & (QLOGIC_IB_EXTS_MEMBIST_ENDTEST | + QLOGIC_IB_EXTS_MEMBIST_DISABLED))) + qib_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & QLOGIC_IB_EXTS_MEMBIST_DISABLED) + qib_devinfo(dd->pcidev, "MemBIST is disabled.\n"); + + val = ~0ULL; /* default to all hwerrors become interrupts, */ + + val &= ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR; + dd->cspec->hwerrmask = val; + + qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + + /* clear all */ + qib_write_kreg(dd, kr_errclear, ~0ULL); + /* enable errors that are masked, at least this first time. */ + qib_write_kreg(dd, kr_errmask, ~0ULL); + dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask); + /* clear any interrupts up to this point (ints still not enabled) */ + qib_write_kreg(dd, kr_intclear, ~0ULL); +} + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing + * on chips that are count-based, rather than trigger-based. There is no + * reference counting, but that's also fine, given the intended use. + * Only chip-specific because it's all register accesses + */ +static void qib_set_7220_armlaunch(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + qib_write_kreg(dd, kr_errclear, ERR_MASK(SendPioArmLaunchErr)); + dd->cspec->errormask |= ERR_MASK(SendPioArmLaunchErr); + } else + dd->cspec->errormask &= ~ERR_MASK(SendPioArmLaunchErr); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +/* + * Formerly took parameter in pre-shifted, + * pre-merged form with LinkCmd and LinkInitCmd + * together, and assuming the zero was NOP. + */ +static void qib_set_ib_7220_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd) +{ + u64 mod_wd; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + + mod_wd = (linkcmd << IBA7220_IBCC_LINKCMD_SHIFT) | + (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl | mod_wd); + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); +} + +/* + * All detailed interaction with the SerDes has been moved to qib_sd7220.c + * + * The portion of IBA7220-specific bringup_serdes() that actually deals with + * registers and memory within the SerDes itself is qib_sd7220_init(). + */ + +/** + * qib_7220_bringup_serdes - bring up the serdes + * @ppd: physical port on the qlogic_ib device + */ +static int qib_7220_bringup_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val, prev_val, guid, ibc; + int ret = 0; + + /* Put IBC in reset, sends disabled */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, 0ULL); + + if (qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7220_creg32(dd, cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = + read_7220_creg32(dd, cr_iblinkerrrecov); + } + + /* flowcontrolwatermark is in units of KBytes */ + ibc = 0x5ULL << SYM_LSB(IBCCtrl, FlowCtrlWaterMark); + /* + * How often flowctrl sent. More or less in usecs; balance against + * watermark value, so that in theory senders always get a flow + * control update in time to not let the IB link go idle. + */ + ibc |= 0x3ULL << SYM_LSB(IBCCtrl, FlowCtrlPeriod); + /* max error tolerance */ + ibc |= 0xfULL << SYM_LSB(IBCCtrl, PhyerrThreshold); + /* use "real" buffer space for */ + ibc |= 4ULL << SYM_LSB(IBCCtrl, CreditScale); + /* IB credit flow control. */ + ibc |= 0xfULL << SYM_LSB(IBCCtrl, OverrunThreshold); + /* + * set initial max size pkt IBC will send, including ICRC; it's the + * PIO buffer size in dwords, less 1; also see qib_set_mtu() + */ + ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << SYM_LSB(IBCCtrl, MaxPktLen); + ppd->cpspec->ibcctrl = ibc; /* without linkcmd or linkinitcmd! */ + + /* initially come up waiting for TS1, without sending anything. */ + val = ppd->cpspec->ibcctrl | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg(dd, kr_ibcctrl, val); + + if (!ppd->cpspec->ibcddrctrl) { + /* not on re-init after reset */ + ppd->cpspec->ibcddrctrl = qib_read_kreg64(dd, kr_ibcddrctrl); + + if (ppd->link_speed_enabled == (QIB_IB_SDR | QIB_IB_DDR)) + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + else + ppd->cpspec->ibcddrctrl |= + ppd->link_speed_enabled == QIB_IB_DDR ? + IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR; + if ((ppd->link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) == + (IB_WIDTH_1X | IB_WIDTH_4X)) + ppd->cpspec->ibcddrctrl |= IBA7220_IBC_WIDTH_AUTONEG; + else + ppd->cpspec->ibcddrctrl |= + ppd->link_width_enabled == IB_WIDTH_4X ? + IBA7220_IBC_WIDTH_4X_ONLY : + IBA7220_IBC_WIDTH_1X_ONLY; + + /* always enable these on driver reload, not sticky */ + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_RXPOL_MASK << IBA7220_IBC_RXPOL_SHIFT; + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; + + /* enable automatic lane reversal detection for receive */ + ppd->cpspec->ibcddrctrl |= IBA7220_IBC_LANE_REV_SUPPORTED; + } else + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + + qib_write_kreg(dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl); + qib_write_kreg(dd, kr_scratch, 0); + + qib_write_kreg(dd, kr_ncmodectrl, 0Ull); + qib_write_kreg(dd, kr_scratch, 0); + + ret = qib_sd7220_init(dd); + + val = qib_read_kreg64(dd, kr_xgxs_cfg); + prev_val = val; + val |= QLOGIC_IB_XGXS_FC_SAFE; + if (val != prev_val) { + qib_write_kreg(dd, kr_xgxs_cfg, val); + qib_read_kreg32(dd, kr_scratch); + } + if (val & QLOGIC_IB_XGXS_RESET) + val &= ~QLOGIC_IB_XGXS_RESET; + if (val != prev_val) + qib_write_kreg(dd, kr_xgxs_cfg, val); + + /* first time through, set port guid */ + if (!ppd->guid) + ppd->guid = dd->base_guid; + guid = be64_to_cpu(ppd->guid); + + qib_write_kreg(dd, kr_hrtbt_guid, guid); + if (!ret) { + dd->control |= QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, dd->control); + } else + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + return ret; +} + +/** + * qib_7220_quiet_serdes - set serdes to txidle + * @ppd: physical port of the qlogic_ib device + * Called when driver is being unloaded + */ +static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) +{ + u64 val; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + /* disable IBC */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, + dd->control | QLOGIC_IB_C_FREEZEMODE); + + ppd->cpspec->chase_end = 0; + if (ppd->cpspec->chase_timer.data) /* if initted */ + del_timer_sync(&ppd->cpspec->chase_timer); + + if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta || + ppd->cpspec->ibdeltainprog) { + u64 diagc; + + /* enable counter writes */ + diagc = qib_read_kreg64(dd, kr_hwdiagctrl); + qib_write_kreg(dd, kr_hwdiagctrl, + diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable)); + + if (ppd->cpspec->ibsymdelta || ppd->cpspec->ibdeltainprog) { + val = read_7220_creg32(dd, cr_ibsymbolerr); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->ibsymsnap; + val -= ppd->cpspec->ibsymdelta; + write_7220_creg(dd, cr_ibsymbolerr, val); + } + if (ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) { + val = read_7220_creg32(dd, cr_iblinkerrrecov); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->iblnkerrsnap; + val -= ppd->cpspec->iblnkerrdelta; + write_7220_creg(dd, cr_iblinkerrrecov, val); + } + + /* and disable counter writes */ + qib_write_kreg(dd, kr_hwdiagctrl, diagc); + } + qib_set_ib_7220_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + wake_up(&ppd->cpspec->autoneg_wait); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); + + shutdown_7220_relock_poll(ppd->dd); + val = qib_read_kreg64(ppd->dd, kr_xgxs_cfg); + val |= QLOGIC_IB_XGXS_RESET; + qib_write_kreg(ppd->dd, kr_xgxs_cfg, val); +} + +/** + * qib_setup_7220_setextled - set the state of the two external LEDs + * @dd: the qlogic_ib device + * @on: whether the link is up or not + * + * The exact combo of LEDs if on is true is determined by looking + * at the ibcstatus. + * + * These LEDs indicate the physical and logical state of IB link. + * For this chip (at least with recommended board pinouts), LED1 + * is Yellow (logical state) and LED2 is Green (physical state), + * + * Note: We try to match the Mellanox HCA LED behavior as best + * we can. Green indicates physical link state is OK (something is + * plugged in, and we can train). + * Amber indicates the link is logically up (ACTIVE). + * Mellanox further blinks the amber LED to indicate data packet + * activity, but we have no hardware support for that, so it would + * require waking up every 10-20 msecs and checking the counters + * on the chip, and then turning the LED off if appropriate. That's + * visible overhead, so not something we will do. + * + */ +static void qib_setup_7220_setextled(struct qib_pportdata *ppd, u32 on) +{ + struct qib_devdata *dd = ppd->dd; + u64 extctl, ledblink = 0, val, lst, ltst; + unsigned long flags; + + /* + * The diags use the LED to indicate diag info, so we leave + * the external LED alone when the diags are running. + */ + if (dd->diag_client) + return; + + if (ppd->led_override) { + ltst = (ppd->led_override & QIB_LED_PHYS) ? + IB_PHYSPORTSTATE_LINKUP : IB_PHYSPORTSTATE_DISABLED, + lst = (ppd->led_override & QIB_LED_LOG) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + } else if (on) { + val = qib_read_kreg64(dd, kr_ibcstatus); + ltst = qib_7220_phys_portstate(val); + lst = qib_7220_iblink_state(val); + } else { + ltst = 0; + lst = 0; + } + + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + extctl = dd->cspec->extctrl & ~(SYM_MASK(EXTCtrl, LEDPriPortGreenOn) | + SYM_MASK(EXTCtrl, LEDPriPortYellowOn)); + if (ltst == IB_PHYSPORTSTATE_LINKUP) { + extctl |= SYM_MASK(EXTCtrl, LEDPriPortGreenOn); + /* + * counts are in chip clock (4ns) periods. + * This is 1/16 sec (66.6ms) on, + * 3/16 sec (187.5 ms) off, with packets rcvd + */ + ledblink = ((66600 * 1000UL / 4) << IBA7220_LEDBLINK_ON_SHIFT) + | ((187500 * 1000UL / 4) << IBA7220_LEDBLINK_OFF_SHIFT); + } + if (lst == IB_PORT_ACTIVE) + extctl |= SYM_MASK(EXTCtrl, LEDPriPortYellowOn); + dd->cspec->extctrl = extctl; + qib_write_kreg(dd, kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + + if (ledblink) /* blink the LED on packet receive */ + qib_write_kreg(dd, kr_rcvpktledcnt, ledblink); +} + +static void qib_7220_free_irq(struct qib_devdata *dd) +{ + if (dd->cspec->irq) { + free_irq(dd->cspec->irq, dd); + dd->cspec->irq = 0; + } + qib_nomsi(dd); +} + +/* + * qib_setup_7220_cleanup - clean up any per-chip chip-specific stuff + * @dd: the qlogic_ib device + * + * This is called during driver unload. + * + */ +static void qib_setup_7220_cleanup(struct qib_devdata *dd) +{ + qib_7220_free_irq(dd); + kfree(dd->cspec->cntrs); + kfree(dd->cspec->portcntrs); +} + +/* + * This is only called for SDmaInt. + * SDmaDisabled is handled on the error path. + */ +static void sdma_7220_intr(struct qib_pportdata *ppd, u64 istat) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + switch (ppd->sdma_state.current_state) { + case qib_sdma_state_s00_hw_down: + break; + + case qib_sdma_state_s10_hw_start_up_wait: + __qib_sdma_process_event(ppd, qib_sdma_event_e20_hw_started); + break; + + case qib_sdma_state_s20_idle: + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + break; + + case qib_sdma_state_s50_hw_halt_wait: + __qib_sdma_process_event(ppd, qib_sdma_event_e60_hw_halted); + break; + + case qib_sdma_state_s99_running: + /* too chatty to print here */ + __qib_sdma_intr(ppd); + break; + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +static void qib_wantpiobuf_7220_intr(struct qib_devdata *dd, u32 needint) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (needint) { + if (!(dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd))) + goto done; + /* + * blip the availupd off, next write will be on, so + * we ensure an avail update, regardless of threshold or + * buffers becoming free, whenever we want an interrupt + */ + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl & + ~SYM_MASK(SendCtrl, SendBufAvailUpd)); + qib_write_kreg(dd, kr_scratch, 0ULL); + dd->sendctrl |= SYM_MASK(SendCtrl, SendIntBufAvail); + } else + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendIntBufAvail); + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0ULL); +done: + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); +} + +/* + * Handle errors and unusual events first, separate function + * to improve cache hits for fast path interrupt handling. + */ +static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat) +{ + if (unlikely(istat & ~QLOGIC_IB_I_BITSEXTANT)) + qib_dev_err(dd, + "interrupt with unknown interrupts %Lx set\n", + istat & ~QLOGIC_IB_I_BITSEXTANT); + + if (istat & QLOGIC_IB_I_GPIO) { + u32 gpiostatus; + + /* + * Boards for this chip currently don't use GPIO interrupts, + * so clear by writing GPIOstatus to GPIOclear, and complain + * to alert developer. To avoid endless repeats, clear + * the bits in the mask, since there is some kind of + * programming error or chip problem. + */ + gpiostatus = qib_read_kreg32(dd, kr_gpio_status); + /* + * In theory, writing GPIOstatus to GPIOclear could + * have a bad side-effect on some diagnostic that wanted + * to poll for a status-change, but the various shadows + * make that problematic at best. Diags will just suppress + * all GPIO interrupts during such tests. + */ + qib_write_kreg(dd, kr_gpio_clear, gpiostatus); + + if (gpiostatus) { + const u32 mask = qib_read_kreg32(dd, kr_gpio_mask); + u32 gpio_irq = mask & gpiostatus; + + /* + * A bit set in status and (chip) Mask register + * would cause an interrupt. Since we are not + * expecting any, report it. Also check that the + * chip reflects our shadow, report issues, + * and refresh from the shadow. + */ + /* + * Clear any troublemakers, and update chip + * from shadow + */ + dd->cspec->gpio_mask &= ~gpio_irq; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + } + } + + if (istat & QLOGIC_IB_I_ERROR) { + u64 estat; + + qib_stats.sps_errints++; + estat = qib_read_kreg64(dd, kr_errstatus); + if (!estat) + qib_devinfo(dd->pcidev, "error interrupt (%Lx), " + "but no error bits set!\n", istat); + else + handle_7220_errors(dd, estat); + } +} + +static irqreturn_t qib_7220intr(int irq, void *data) +{ + struct qib_devdata *dd = data; + irqreturn_t ret; + u64 istat; + u64 ctxtrbits; + u64 rmask; + unsigned i; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) { + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + ret = IRQ_HANDLED; + goto bail; + } + + istat = qib_read_kreg64(dd, kr_intstatus); + + if (unlikely(!istat)) { + ret = IRQ_NONE; /* not our interrupt, or already handled */ + goto bail; + } + if (unlikely(istat == -1)) { + qib_bad_intrstatus(dd); + /* don't know if it was our interrupt or not */ + ret = IRQ_NONE; + goto bail; + } + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | + QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) + unlikely_7220_intr(dd, istat); + + /* + * Clear the interrupt bits we found set, relatively early, so we + * "know" know the chip will have seen this by the time we process + * the queue, and will re-interrupt if necessary. The processor + * itself won't take the interrupt again until we return. + */ + qib_write_kreg(dd, kr_intclear, istat); + + /* + * Handle kernel receive queues before checking for pio buffers + * available since receives can overflow; piobuf waiters can afford + * a few extra cycles, since they were waiting anyway. + */ + ctxtrbits = istat & + ((QLOGIC_IB_I_RCVAVAIL_MASK << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT)); + if (ctxtrbits) { + rmask = (1ULL << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (1ULL << QLOGIC_IB_I_RCVURG_SHIFT); + for (i = 0; i < dd->first_user_ctxt; i++) { + if (ctxtrbits & rmask) { + ctxtrbits &= ~rmask; + qib_kreceive(dd->rcd[i], NULL, NULL); + } + rmask <<= 1; + } + if (ctxtrbits) { + ctxtrbits = + (ctxtrbits >> QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (ctxtrbits >> QLOGIC_IB_I_RCVURG_SHIFT); + qib_handle_urcv(dd, ctxtrbits); + } + } + + /* only call for SDmaInt */ + if (istat & QLOGIC_IB_I_SDMAINT) + sdma_7220_intr(dd->pport, istat); + + if ((istat & QLOGIC_IB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED)) + qib_ib_piobufavail(dd); + + ret = IRQ_HANDLED; +bail: + return ret; +} + +/* + * Set up our chip-specific interrupt handler. + * The interrupt type has already been setup, so + * we just need to do the registration and error checking. + * If we are using MSI interrupts, we may fall back to + * INTx later, if the interrupt handler doesn't get called + * within 1/2 second (see verify_interrupt()). + */ +static void qib_setup_7220_interrupt(struct qib_devdata *dd) +{ + if (!dd->cspec->irq) + qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " + "work\n"); + else { + int ret = request_irq(dd->cspec->irq, qib_7220intr, + dd->msi_lo ? 0 : IRQF_SHARED, + QIB_DRV_NAME, dd); + + if (ret) + qib_dev_err(dd, "Couldn't setup %s interrupt " + "(irq=%d): %d\n", dd->msi_lo ? + "MSI" : "INTx", dd->cspec->irq, ret); + } +} + +/** + * qib_7220_boardname - fill in the board name + * @dd: the qlogic_ib device + * + * info is based on the board revision register + */ +static void qib_7220_boardname(struct qib_devdata *dd) +{ + char *n; + u32 boardid, namelen; + + boardid = SYM_FIELD(dd->revision, Revision, + BoardID); + + switch (boardid) { + case 1: + n = "InfiniPath_QLE7240"; + break; + case 2: + n = "InfiniPath_QLE7280"; + break; + default: + qib_dev_err(dd, "Unknown 7220 board with ID %u\n", boardid); + n = "Unknown_InfiniPath_7220"; + break; + } + + namelen = strlen(n) + 1; + dd->boardname = kmalloc(namelen, GFP_KERNEL); + if (!dd->boardname) + qib_dev_err(dd, "Failed allocation for board name: %s\n", n); + else + snprintf(dd->boardname, namelen, "%s", n); + + if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) + qib_dev_err(dd, "Unsupported InfiniPath hardware " + "revision %u.%u!\n", + dd->majrev, dd->minrev); + + snprintf(dd->boardversion, sizeof(dd->boardversion), + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", + QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, + (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + dd->majrev, dd->minrev, + (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); +} + +/* + * This routine sleeps, so it can only be called from user context, not + * from interrupt context. + */ +static int qib_setup_7220_reset(struct qib_devdata *dd) +{ + u64 val; + int i; + int ret; + u16 cmdval; + u8 int_line, clinesz; + unsigned long flags; + + qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz); + + /* Use dev_err so it shows up in logs, etc. */ + qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit); + + /* no interrupts till re-initted */ + qib_7220_set_intr_state(dd, 0); + + dd->pport->cpspec->ibdeltainprog = 0; + dd->pport->cpspec->ibsymdelta = 0; + dd->pport->cpspec->iblnkerrdelta = 0; + + /* + * Keep chip from being accessed until we are ready. Use + * writeq() directly, to allow the write even though QIB_PRESENT + * isn't set. + */ + dd->flags &= ~(QIB_INITTED | QIB_PRESENT); + dd->int_counter = 0; /* so we check interrupts work again */ + val = dd->control | QLOGIC_IB_C_RESET; + writeq(val, &dd->kregbase[kr_control]); + mb(); /* prevent compiler reordering around actual reset */ + + for (i = 1; i <= 5; i++) { + /* + * Allow MBIST, etc. to complete; longer on each retry. + * We sometimes get machine checks from bus timeout if no + * response, so for now, make it *really* long. + */ + msleep(1000 + (1 + i) * 2000); + + qib_pcie_reenable(dd, cmdval, int_line, clinesz); + + /* + * Use readq directly, so we don't need to mark it as PRESENT + * until we get a successful indication that all is well. + */ + val = readq(&dd->kregbase[kr_revision]); + if (val == dd->revision) { + dd->flags |= QIB_PRESENT; /* it's back */ + ret = qib_reinit_intr(dd); + goto bail; + } + } + ret = 0; /* failed */ + +bail: + if (ret) { + if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + qib_dev_err(dd, "Reset failed to setup PCIe or " + "interrupts; continuing anyway\n"); + + /* hold IBC in reset, no sends, etc till later */ + qib_write_kreg(dd, kr_control, 0ULL); + + /* clear the reset error, init error/hwerror mask */ + qib_7220_init_hwerrors(dd); + + /* do setup similar to speed or link-width changes */ + if (dd->pport->cpspec->ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK) + dd->cspec->presets_needed = 1; + spin_lock_irqsave(&dd->pport->lflags_lock, flags); + dd->pport->lflags |= QIBL_IB_FORCE_NOTIFY; + dd->pport->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&dd->pport->lflags_lock, flags); + } + + return ret; +} + +/** + * qib_7220_put_tid - write a TID to the chip + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: 0 for eager, 1 for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + */ +static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + if (pa != dd->tidinvalid) { + u64 chippa = pa >> IBA7220_TID_PA_SHIFT; + + /* paranoia checks */ + if (pa != (chippa << IBA7220_TID_PA_SHIFT)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { + qib_dev_err(dd, "Physical page address 0x%lx " + "larger than supported\n", pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + chippa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + chippa |= IBA7220_TID_SZ_4K; + pa = chippa; + } + writeq(pa, tidptr); + mmiowb(); +} + +/** + * qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager + * @dd: the qlogic_ib device + * @ctxt: the ctxt + * + * clear all TID entries for a ctxt, expected and eager. + * Used from qib_close(). On this chip, TIDs are only 32 bits, + * not 64, but they are still on 64 bit boundaries, so tidbase + * is declared as u64 * for the pointer math, even though we write 32 bits + */ +static void qib_7220_clear_tids(struct qib_devdata *dd, + struct qib_ctxtdata *rcd) +{ + u64 __iomem *tidbase; + unsigned long tidinv; + u32 ctxt; + int i; + + if (!dd->kregbase || !rcd) + return; + + ctxt = rcd->ctxt; + + tidinv = dd->tidinvalid; + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvtidbase + + ctxt * dd->rcvtidcnt * sizeof(*tidbase)); + + for (i = 0; i < dd->rcvtidcnt; i++) + qib_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); + + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvegrbase + + rcd->rcvegr_tid_base * sizeof(*tidbase)); + + for (i = 0; i < rcd->rcvegrcnt; i++) + qib_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); +} + +/** + * qib_7220_tidtemplate - setup constants for TID updates + * @dd: the qlogic_ib device + * + * We setup stuff that we use a lot, to avoid calculating each time + */ +static void qib_7220_tidtemplate(struct qib_devdata *dd) +{ + if (dd->rcvegrbufsize == 2048) + dd->tidtemplate = IBA7220_TID_SZ_2K; + else if (dd->rcvegrbufsize == 4096) + dd->tidtemplate = IBA7220_TID_SZ_4K; + dd->tidinvalid = 0; +} + +/** + * qib_init_7220_get_base_info - set chip-specific flags for user code + * @rcd: the qlogic_ib ctxt + * @kbase: qib_base_info pointer + * + * We set the PCIE flag because the lower bandwidth on PCIe vs + * HyperTransport can affect some user packet algorithims. + */ +static int qib_7220_get_base_info(struct qib_ctxtdata *rcd, + struct qib_base_info *kinfo) +{ + kinfo->spi_runtime_flags |= QIB_RUNTIME_PCIE | + QIB_RUNTIME_NODMA_RTAIL | QIB_RUNTIME_SDMA; + + if (rcd->dd->flags & QIB_USE_SPCL_TRIG) + kinfo->spi_runtime_flags |= QIB_RUNTIME_SPECIAL_TRIGGER; + + return 0; +} + +static struct qib_message_header * +qib_7220_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) +{ + u32 offset = qib_hdrget_offset(rhf_addr); + + return (struct qib_message_header *) + (rhf_addr - dd->rhf_offset + offset); +} + +static void qib_7220_config_ctxts(struct qib_devdata *dd) +{ + unsigned long flags; + u32 nchipctxts; + + nchipctxts = qib_read_kreg32(dd, kr_portcnt); + dd->cspec->numctxts = nchipctxts; + if (qib_n_krcv_queues > 1) { + dd->qpn_mask = 0x3e; + dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports; + if (dd->first_user_ctxt > nchipctxts) + dd->first_user_ctxt = nchipctxts; + } else + dd->first_user_ctxt = dd->num_pports; + dd->n_krcv_queues = dd->first_user_ctxt; + + if (!qib_cfgctxts) { + int nctxts = dd->first_user_ctxt + num_online_cpus(); + + if (nctxts <= 5) + dd->ctxtcnt = 5; + else if (nctxts <= 9) + dd->ctxtcnt = 9; + else if (nctxts <= nchipctxts) + dd->ctxtcnt = nchipctxts; + } else if (qib_cfgctxts <= nchipctxts) + dd->ctxtcnt = qib_cfgctxts; + if (!dd->ctxtcnt) /* none of the above, set to max */ + dd->ctxtcnt = nchipctxts; + + /* + * Chip can be configured for 5, 9, or 17 ctxts, and choice + * affects number of eager TIDs per ctxt (1K, 2K, 4K). + * Lock to be paranoid about later motion, etc. + */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + if (dd->ctxtcnt > 9) + dd->rcvctrl |= 2ULL << IBA7220_R_CTXTCFG_SHIFT; + else if (dd->ctxtcnt > 5) + dd->rcvctrl |= 1ULL << IBA7220_R_CTXTCFG_SHIFT; + /* else configure for default 5 receive ctxts */ + if (dd->qpn_mask) + dd->rcvctrl |= 1ULL << QIB_7220_RcvCtrl_RcvQPMapEnable_LSB; + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* kr_rcvegrcnt changes based on the number of contexts enabled */ + dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt); + dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, IBA7220_KRCVEGRCNT); +} + +static int qib_7220_get_ib_cfg(struct qib_pportdata *ppd, int which) +{ + int lsb, ret = 0; + u64 maskr; /* right-justified mask */ + + switch (which) { + case QIB_IB_CFG_LWID_ENB: /* Get allowed Link-width */ + ret = ppd->link_width_enabled; + goto done; + + case QIB_IB_CFG_LWID: /* Get currently active Link-width */ + ret = ppd->link_width_active; + goto done; + + case QIB_IB_CFG_SPD_ENB: /* Get allowed Link speeds */ + ret = ppd->link_speed_enabled; + goto done; + + case QIB_IB_CFG_SPD: /* Get current Link spd */ + ret = ppd->link_speed_active; + goto done; + + case QIB_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */ + lsb = IBA7220_IBC_RXPOL_SHIFT; + maskr = IBA7220_IBC_RXPOL_MASK; + break; + + case QIB_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */ + lsb = IBA7220_IBC_LREV_SHIFT; + maskr = IBA7220_IBC_LREV_MASK; + break; + + case QIB_IB_CFG_LINKLATENCY: + ret = qib_read_kreg64(ppd->dd, kr_ibcddrstatus) + & IBA7220_DDRSTAT_LINKLAT_MASK; + goto done; + + case QIB_IB_CFG_OP_VLS: + ret = ppd->vls_operational; + goto done; + + case QIB_IB_CFG_VL_HIGH_CAP: + ret = 0; + goto done; + + case QIB_IB_CFG_VL_LOW_CAP: + ret = 0; + goto done; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + OverrunThreshold); + goto done; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + goto done; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + ret = (ppd->cpspec->ibcctrl & + SYM_MASK(IBCCtrl, LinkDownDefaultState)) ? + IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL; + goto done; + + case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */ + lsb = IBA7220_IBC_HRTBT_SHIFT; + maskr = IBA7220_IBC_HRTBT_MASK; + break; + + case QIB_IB_CFG_PMA_TICKS: + /* + * 0x00 = 10x link transfer rate or 4 nsec. for 2.5Gbs + * Since the clock is always 250MHz, the value is 1 or 0. + */ + ret = (ppd->link_speed_active == QIB_IB_DDR); + goto done; + + default: + ret = -EINVAL; + goto done; + } + ret = (int)((ppd->cpspec->ibcddrctrl >> lsb) & maskr); +done: + return ret; +} + +static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) +{ + struct qib_devdata *dd = ppd->dd; + u64 maskr; /* right-justified mask */ + int lsb, ret = 0, setforce = 0; + u16 lcmd, licmd; + unsigned long flags; + u32 tmp = 0; + + switch (which) { + case QIB_IB_CFG_LIDLMC: + /* + * Set LID and LMC. Combined to avoid possible hazard + * caller puts LMC in 16MSbits, DLID in 16LSbits of val + */ + lsb = IBA7220_IBC_DLIDLMC_SHIFT; + maskr = IBA7220_IBC_DLIDLMC_MASK; + break; + + case QIB_IB_CFG_LWID_ENB: /* set allowed Link-width */ + /* + * As with speed, only write the actual register if + * the link is currently down, otherwise takes effect + * on next link change. + */ + ppd->link_width_enabled = val; + if (!(ppd->lflags & QIBL_LINKDOWN)) + goto bail; + /* + * We set the QIBL_IB_FORCE_NOTIFY bit so updown + * will get called because we want update + * link_width_active, and the change may not take + * effect for some time (if we are in POLL), so this + * flag will force the updown routine to be called + * on the next ibstatuschange down interrupt, even + * if it's not an down->up transition. + */ + val--; /* convert from IB to chip */ + maskr = IBA7220_IBC_WIDTH_MASK; + lsb = IBA7220_IBC_WIDTH_SHIFT; + setforce = 1; + break; + + case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */ + /* + * If we turn off IB1.2, need to preset SerDes defaults, + * but not right now. Set a flag for the next time + * we command the link down. As with width, only write the + * actual register if the link is currently down, otherwise + * takes effect on next link change. Since setting is being + * explicitly requested (via MAD or sysfs), clear autoneg + * failure status if speed autoneg is enabled. + */ + ppd->link_speed_enabled = val; + if ((ppd->cpspec->ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK) && + !(val & (val - 1))) + dd->cspec->presets_needed = 1; + if (!(ppd->lflags & QIBL_LINKDOWN)) + goto bail; + /* + * We set the QIBL_IB_FORCE_NOTIFY bit so updown + * will get called because we want update + * link_speed_active, and the change may not take + * effect for some time (if we are in POLL), so this + * flag will force the updown routine to be called + * on the next ibstatuschange down interrupt, even + * if it's not an down->up transition. + */ + if (val == (QIB_IB_SDR | QIB_IB_DDR)) { + val = IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else + val = val == QIB_IB_DDR ? + IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR; + maskr = IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + /* IBTA 1.2 mode + speed bits are contiguous */ + lsb = SYM_LSB(IBCDDRCtrl, IB_ENHANCED_MODE); + setforce = 1; + break; + + case QIB_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */ + lsb = IBA7220_IBC_RXPOL_SHIFT; + maskr = IBA7220_IBC_RXPOL_MASK; + break; + + case QIB_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */ + lsb = IBA7220_IBC_LREV_SHIFT; + maskr = IBA7220_IBC_LREV_MASK; + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + OverrunThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, OverrunThreshold); + ppd->cpspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, OverrunThreshold); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + goto bail; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, PhyerrThreshold); + ppd->cpspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, PhyerrThreshold); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + goto bail; + + case QIB_IB_CFG_PKEYS: /* update pkeys */ + maskr = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + qib_write_kreg(dd, kr_partitionkey, maskr); + goto bail; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + if (val == IB_LINKINITCMD_POLL) + ppd->cpspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, LinkDownDefaultState); + else /* SLEEP */ + ppd->cpspec->ibcctrl |= + SYM_MASK(IBCCtrl, LinkDownDefaultState); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + goto bail; + + case QIB_IB_CFG_MTU: /* update the MTU in IBC */ + /* + * Update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords + * Set even if it's unchanged, print debug message only + * on changes. + */ + val = (ppd->ibmaxlen >> 2) + 1; + ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, MaxPktLen); + ppd->cpspec->ibcctrl |= (u64)val << SYM_LSB(IBCCtrl, MaxPktLen); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + goto bail; + + case QIB_IB_CFG_LSTATE: /* set the IB link state */ + switch (val & 0xffff0000) { + case IB_LINKCMD_DOWN: + lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN; + if (!ppd->cpspec->ibdeltainprog && + qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = + read_7220_creg32(dd, cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = + read_7220_creg32(dd, cr_iblinkerrrecov); + } + break; + + case IB_LINKCMD_ARMED: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED; + break; + + case IB_LINKCMD_ACTIVE: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16); + goto bail; + } + switch (val & 0xffff) { + case IB_LINKINITCMD_NOP: + licmd = 0; + break; + + case IB_LINKINITCMD_POLL: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL; + break; + + case IB_LINKINITCMD_SLEEP: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP; + break; + + case IB_LINKINITCMD_DISABLE: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE; + ppd->cpspec->chase_end = 0; + /* + * stop state chase counter and timer, if running. + * wait forpending timer, but don't clear .data (ppd)! + */ + if (ppd->cpspec->chase_timer.expires) { + del_timer_sync(&ppd->cpspec->chase_timer); + ppd->cpspec->chase_timer.expires = 0; + } + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkinitcmd req 0x%x\n", + val & 0xffff); + goto bail; + } + qib_set_ib_7220_lstate(ppd, lcmd, licmd); + + maskr = IBA7220_IBC_WIDTH_MASK; + lsb = IBA7220_IBC_WIDTH_SHIFT; + tmp = (ppd->cpspec->ibcddrctrl >> lsb) & maskr; + /* If the width active on the chip does not match the + * width in the shadow register, write the new active + * width to the chip. + * We don't have to worry about speed as the speed is taken + * care of by set_7220_ibspeed_fast called by ib_updown. + */ + if (ppd->link_width_enabled-1 != tmp) { + ppd->cpspec->ibcddrctrl &= ~(maskr << lsb); + ppd->cpspec->ibcddrctrl |= + (((u64)(ppd->link_width_enabled-1) & maskr) << + lsb); + qib_write_kreg(dd, kr_ibcddrctrl, + ppd->cpspec->ibcddrctrl); + qib_write_kreg(dd, kr_scratch, 0); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + goto bail; + + case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */ + if (val > IBA7220_IBC_HRTBT_MASK) { + ret = -EINVAL; + goto bail; + } + lsb = IBA7220_IBC_HRTBT_SHIFT; + maskr = IBA7220_IBC_HRTBT_MASK; + break; + + default: + ret = -EINVAL; + goto bail; + } + ppd->cpspec->ibcddrctrl &= ~(maskr << lsb); + ppd->cpspec->ibcddrctrl |= (((u64) val & maskr) << lsb); + qib_write_kreg(dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl); + qib_write_kreg(dd, kr_scratch, 0); + if (setforce) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } +bail: + return ret; +} + +static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) +{ + int ret = 0; + u64 val, ddr; + + if (!strncmp(what, "ibc", 3)) { + ppd->cpspec->ibcctrl |= SYM_MASK(IBCCtrl, Loopback); + val = 0; /* disable heart beat, so link will come up */ + qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n", + ppd->dd->unit, ppd->port); + } else if (!strncmp(what, "off", 3)) { + ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); + /* enable heart beat again */ + val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; + qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " + "(normal)\n", ppd->dd->unit, ppd->port); + } else + ret = -EINVAL; + if (!ret) { + qib_write_kreg(ppd->dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + ddr = ppd->cpspec->ibcddrctrl & ~(IBA7220_IBC_HRTBT_MASK + << IBA7220_IBC_HRTBT_SHIFT); + ppd->cpspec->ibcddrctrl = ddr | val; + qib_write_kreg(ppd->dd, kr_ibcddrctrl, + ppd->cpspec->ibcddrctrl); + qib_write_kreg(ppd->dd, kr_scratch, 0); + } + return ret; +} + +static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd, + u32 updegr, u32 egrhd, u32 npkts) +{ + if (updegr) + qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); +} + +static u32 qib_7220_hdrqempty(struct qib_ctxtdata *rcd) +{ + u32 head, tail; + + head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt); + if (rcd->rcvhdrtail_kvaddr) + tail = qib_get_rcvhdrtail(rcd); + else + tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt); + return head == tail; +} + +/* + * Modify the RCVCTRL register in chip-specific way. This + * is a function because bit positions and (future) register + * location is chip-specifc, but the needed operations are + * generic. is a bit-mask because we often want to + * do multiple modifications. + */ +static void rcvctrl_7220_mod(struct qib_pportdata *ppd, unsigned int op, + int ctxt) +{ + struct qib_devdata *dd = ppd->dd; + u64 mask, val; + unsigned long flags; + + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + if (op & QIB_RCVCTRL_TAILUPD_ENB) + dd->rcvctrl |= (1ULL << IBA7220_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_TAILUPD_DIS) + dd->rcvctrl &= ~(1ULL << IBA7220_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_PKEY_ENB) + dd->rcvctrl &= ~(1ULL << IBA7220_R_PKEY_DIS_SHIFT); + if (op & QIB_RCVCTRL_PKEY_DIS) + dd->rcvctrl |= (1ULL << IBA7220_R_PKEY_DIS_SHIFT); + if (ctxt < 0) + mask = (1ULL << dd->ctxtcnt) - 1; + else + mask = (1ULL << ctxt); + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* always done for specific ctxt */ + dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, PortEnable)); + if (!(dd->flags & QIB_NODMA_RTAIL)) + dd->rcvctrl |= 1ULL << IBA7220_R_TAILUPD_SHIFT; + /* Write these registers before the context is enabled. */ + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, + dd->rcd[ctxt]->rcvhdrqtailaddr_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, + dd->rcd[ctxt]->rcvhdrq_phys); + dd->rcd[ctxt]->seq_cnt = 1; + } + if (op & QIB_RCVCTRL_CTXT_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, PortEnable)); + if (op & QIB_RCVCTRL_INTRAVAIL_ENB) + dd->rcvctrl |= (mask << IBA7220_R_INTRAVAIL_SHIFT); + if (op & QIB_RCVCTRL_INTRAVAIL_DIS) + dd->rcvctrl &= ~(mask << IBA7220_R_INTRAVAIL_SHIFT); + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && dd->rhdrhead_intr_off) { + /* arm rcv interrupt */ + val = qib_read_ureg32(dd, ur_rcvhdrhead, ctxt) | + dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* + * Init the context registers also; if we were + * disabled, tail and head should both be zero + * already from the enable, but since we don't + * know, we have to do it explicitly. + */ + val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); + qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); + + val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt); + dd->rcd[ctxt]->head = val; + /* If kctxt, interrupt on next receive. */ + if (ctxt < dd->first_user_ctxt) + val |= dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_DIS) { + if (ctxt >= 0) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, 0); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, 0); + } else { + unsigned i; + + for (i = 0; i < dd->cfgctxts; i++) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, + i, 0); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, i, 0); + } + } + } + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); +} + +/* + * Modify the SENDCTRL register in chip-specific way. This + * is a function there may be multiple such registers with + * slightly different layouts. To start, we assume the + * "canonical" register layout of the first chips. + * Chip requires no back-back sendctrl writes, so write + * scratch register after writing sendctrl + */ +static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op) +{ + struct qib_devdata *dd = ppd->dd; + u64 tmp_dd_sendctrl; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + + /* First the ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_CLEAR) + dd->sendctrl = 0; + if (op & QIB_SENDCTRL_SEND_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, SPioEnable); + else if (op & QIB_SENDCTRL_SEND_ENB) { + dd->sendctrl |= SYM_MASK(SendCtrl, SPioEnable); + if (dd->flags & QIB_USE_SPCL_TRIG) + dd->sendctrl |= SYM_MASK(SendCtrl, + SSpecialTriggerEn); + } + if (op & QIB_SENDCTRL_AVAIL_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + else if (op & QIB_SENDCTRL_AVAIL_ENB) + dd->sendctrl |= SYM_MASK(SendCtrl, SendBufAvailUpd); + + if (op & QIB_SENDCTRL_DISARM_ALL) { + u32 i, last; + + tmp_dd_sendctrl = dd->sendctrl; + /* + * disarm any that are not yet launched, disabling sends + * and updates until done. + */ + last = dd->piobcnt2k + dd->piobcnt4k; + tmp_dd_sendctrl &= + ~(SYM_MASK(SendCtrl, SPioEnable) | + SYM_MASK(SendCtrl, SendBufAvailUpd)); + for (i = 0; i < last; i++) { + qib_write_kreg(dd, kr_sendctrl, + tmp_dd_sendctrl | + SYM_MASK(SendCtrl, Disarm) | i); + qib_write_kreg(dd, kr_scratch, 0); + } + } + + tmp_dd_sendctrl = dd->sendctrl; + + if (op & QIB_SENDCTRL_FLUSH) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Abort); + if (op & QIB_SENDCTRL_DISARM) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) | + ((op & QIB_7220_SendCtrl_DisarmPIOBuf_RMASK) << + SYM_LSB(SendCtrl, DisarmPIOBuf)); + if ((op & QIB_SENDCTRL_AVAIL_BLIP) && + (dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd))) + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + if (op & QIB_SENDCTRL_AVAIL_BLIP) { + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + if (op & QIB_SENDCTRL_FLUSH) { + u32 v; + /* + * ensure writes have hit chip, then do a few + * more reads, to allow DMA of pioavail registers + * to occur, so in-memory copy is in sync with + * the chip. Not always safe to sleep. + */ + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + qib_read_kreg32(dd, kr_scratch); + } +} + +/** + * qib_portcntr_7220 - read a per-port counter + * @dd: the qlogic_ib device + * @creg: the counter to snapshot + */ +static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg) +{ + u64 ret = 0ULL; + struct qib_devdata *dd = ppd->dd; + u16 creg; + /* 0xffff for unimplemented or synthesized counters */ + static const u16 xlator[] = { + [QIBPORTCNTR_PKTSEND] = cr_pktsend, + [QIBPORTCNTR_WORDSEND] = cr_wordsend, + [QIBPORTCNTR_PSXMITDATA] = cr_psxmitdatacount, + [QIBPORTCNTR_PSXMITPKTS] = cr_psxmitpktscount, + [QIBPORTCNTR_PSXMITWAIT] = cr_psxmitwaitcount, + [QIBPORTCNTR_SENDSTALL] = cr_sendstall, + [QIBPORTCNTR_PKTRCV] = cr_pktrcv, + [QIBPORTCNTR_PSRCVDATA] = cr_psrcvdatacount, + [QIBPORTCNTR_PSRCVPKTS] = cr_psrcvpktscount, + [QIBPORTCNTR_RCVEBP] = cr_rcvebp, + [QIBPORTCNTR_RCVOVFL] = cr_rcvovfl, + [QIBPORTCNTR_WORDRCV] = cr_wordrcv, + [QIBPORTCNTR_RXDROPPKT] = cr_rxdroppkt, + [QIBPORTCNTR_RXLOCALPHYERR] = cr_rxotherlocalphyerr, + [QIBPORTCNTR_RXVLERR] = cr_rxvlerr, + [QIBPORTCNTR_ERRICRC] = cr_erricrc, + [QIBPORTCNTR_ERRVCRC] = cr_errvcrc, + [QIBPORTCNTR_ERRLPCRC] = cr_errlpcrc, + [QIBPORTCNTR_BADFORMAT] = cr_badformat, + [QIBPORTCNTR_ERR_RLEN] = cr_err_rlen, + [QIBPORTCNTR_IBSYMBOLERR] = cr_ibsymbolerr, + [QIBPORTCNTR_INVALIDRLEN] = cr_invalidrlen, + [QIBPORTCNTR_UNSUPVL] = cr_txunsupvl, + [QIBPORTCNTR_EXCESSBUFOVFL] = cr_excessbufferovfl, + [QIBPORTCNTR_ERRLINK] = cr_errlink, + [QIBPORTCNTR_IBLINKDOWN] = cr_iblinkdown, + [QIBPORTCNTR_IBLINKERRRECOV] = cr_iblinkerrrecov, + [QIBPORTCNTR_LLI] = cr_locallinkintegrityerr, + [QIBPORTCNTR_PSINTERVAL] = cr_psinterval, + [QIBPORTCNTR_PSSTART] = cr_psstart, + [QIBPORTCNTR_PSSTAT] = cr_psstat, + [QIBPORTCNTR_VL15PKTDROP] = cr_vl15droppedpkt, + [QIBPORTCNTR_ERRPKEY] = cr_errpkey, + [QIBPORTCNTR_KHDROVFL] = 0xffff, + }; + + if (reg >= ARRAY_SIZE(xlator)) { + qib_devinfo(ppd->dd->pcidev, + "Unimplemented portcounter %u\n", reg); + goto done; + } + creg = xlator[reg]; + + if (reg == QIBPORTCNTR_KHDROVFL) { + int i; + + /* sum over all kernel contexts */ + for (i = 0; i < dd->first_user_ctxt; i++) + ret += read_7220_creg32(dd, cr_portovfl + i); + } + if (creg == 0xffff) + goto done; + + /* + * only fast incrementing counters are 64bit; use 32 bit reads to + * avoid two independent reads when on opteron + */ + if ((creg == cr_wordsend || creg == cr_wordrcv || + creg == cr_pktsend || creg == cr_pktrcv)) + ret = read_7220_creg(dd, creg); + else + ret = read_7220_creg32(dd, creg); + if (creg == cr_ibsymbolerr) { + if (dd->pport->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->ibsymsnap; + ret -= dd->pport->cpspec->ibsymdelta; + } else if (creg == cr_iblinkerrrecov) { + if (dd->pport->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->iblnkerrsnap; + ret -= dd->pport->cpspec->iblnkerrdelta; + } +done: + return ret; +} + +/* + * Device counter names (not port-specific), one line per stat, + * single string. Used by utilities like ipathstats to print the stats + * in a way which works for different versions of drivers, without changing + * the utility. Names need to be 12 chars or less (w/o newline), for proper + * display by utility. + * Non-error counters are first. + * Start of "error" conters is indicated by a leading "E " on the first + * "error" counter, and doesn't count in label length. + * The EgrOvfl list needs to be last so we truncate them at the configured + * context count for the device. + * cntr7220indices contains the corresponding register indices. + */ +static const char cntr7220names[] = + "Interrupts\n" + "HostBusStall\n" + "E RxTIDFull\n" + "RxTIDInvalid\n" + "Ctxt0EgrOvfl\n" + "Ctxt1EgrOvfl\n" + "Ctxt2EgrOvfl\n" + "Ctxt3EgrOvfl\n" + "Ctxt4EgrOvfl\n" + "Ctxt5EgrOvfl\n" + "Ctxt6EgrOvfl\n" + "Ctxt7EgrOvfl\n" + "Ctxt8EgrOvfl\n" + "Ctxt9EgrOvfl\n" + "Ctx10EgrOvfl\n" + "Ctx11EgrOvfl\n" + "Ctx12EgrOvfl\n" + "Ctx13EgrOvfl\n" + "Ctx14EgrOvfl\n" + "Ctx15EgrOvfl\n" + "Ctx16EgrOvfl\n"; + +static const size_t cntr7220indices[] = { + cr_lbint, + cr_lbflowstall, + cr_errtidfull, + cr_errtidvalid, + cr_portovfl + 0, + cr_portovfl + 1, + cr_portovfl + 2, + cr_portovfl + 3, + cr_portovfl + 4, + cr_portovfl + 5, + cr_portovfl + 6, + cr_portovfl + 7, + cr_portovfl + 8, + cr_portovfl + 9, + cr_portovfl + 10, + cr_portovfl + 11, + cr_portovfl + 12, + cr_portovfl + 13, + cr_portovfl + 14, + cr_portovfl + 15, + cr_portovfl + 16, +}; + +/* + * same as cntr7220names and cntr7220indices, but for port-specific counters. + * portcntr7220indices is somewhat complicated by some registers needing + * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG + */ +static const char portcntr7220names[] = + "TxPkt\n" + "TxFlowPkt\n" + "TxWords\n" + "RxPkt\n" + "RxFlowPkt\n" + "RxWords\n" + "TxFlowStall\n" + "TxDmaDesc\n" /* 7220 and 7322-only */ + "E RxDlidFltr\n" /* 7220 and 7322-only */ + "IBStatusChng\n" + "IBLinkDown\n" + "IBLnkRecov\n" + "IBRxLinkErr\n" + "IBSymbolErr\n" + "RxLLIErr\n" + "RxBadFormat\n" + "RxBadLen\n" + "RxBufOvrfl\n" + "RxEBP\n" + "RxFlowCtlErr\n" + "RxICRCerr\n" + "RxLPCRCerr\n" + "RxVCRCerr\n" + "RxInvalLen\n" + "RxInvalPKey\n" + "RxPktDropped\n" + "TxBadLength\n" + "TxDropped\n" + "TxInvalLen\n" + "TxUnderrun\n" + "TxUnsupVL\n" + "RxLclPhyErr\n" /* 7220 and 7322-only */ + "RxVL15Drop\n" /* 7220 and 7322-only */ + "RxVlErr\n" /* 7220 and 7322-only */ + "XcessBufOvfl\n" /* 7220 and 7322-only */ + ; + +#define _PORT_VIRT_FLAG 0x8000 /* "virtual", need adjustments */ +static const size_t portcntr7220indices[] = { + QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG, + cr_pktsendflow, + QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG, + QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG, + cr_pktrcvflowctrl, + QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG, + QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG, + cr_txsdmadesc, + cr_rxdlidfltr, + cr_ibstatuschange, + QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_LLI | _PORT_VIRT_FLAG, + QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG, + cr_rcvflowctrl_err, + QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG, + cr_invalidslen, + cr_senddropped, + cr_errslen, + cr_sendunderrun, + cr_txunsupvl, + QIBPORTCNTR_RXLOCALPHYERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_VL15PKTDROP | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXVLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_EXCESSBUFOVFL | _PORT_VIRT_FLAG, +}; + +/* do all the setup to make the counter reads efficient later */ +static void init_7220_cntrnames(struct qib_devdata *dd) +{ + int i, j = 0; + char *s; + + for (i = 0, s = (char *)cntr7220names; s && j <= dd->cfgctxts; + i++) { + /* we always have at least one counter before the egrovfl */ + if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12)) + j = 1; + s = strchr(s + 1, '\n'); + if (s && j) + j++; + } + dd->cspec->ncntrs = i; + if (!s) + /* full list; size is without terminating null */ + dd->cspec->cntrnamelen = sizeof(cntr7220names) - 1; + else + dd->cspec->cntrnamelen = 1 + s - cntr7220names; + dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->cntrs) + qib_dev_err(dd, "Failed allocation for counters\n"); + + for (i = 0, s = (char *)portcntr7220names; s; i++) + s = strchr(s + 1, '\n'); + dd->cspec->nportcntrs = i - 1; + dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1; + dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->portcntrs) + qib_dev_err(dd, "Failed allocation for portcounters\n"); +} + +static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep, + u64 **cntrp) +{ + u32 ret; + + if (!dd->cspec->cntrs) { + ret = 0; + goto done; + } + + if (namep) { + *namep = (char *)cntr7220names; + ret = dd->cspec->cntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + } else { + u64 *cntr = dd->cspec->cntrs; + int i; + + ret = dd->cspec->ncntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + + *cntrp = cntr; + for (i = 0; i < dd->cspec->ncntrs; i++) + *cntr++ = read_7220_creg32(dd, cntr7220indices[i]); + } +done: + return ret; +} + +static u32 qib_read_7220portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, + char **namep, u64 **cntrp) +{ + u32 ret; + + if (!dd->cspec->portcntrs) { + ret = 0; + goto done; + } + if (namep) { + *namep = (char *)portcntr7220names; + ret = dd->cspec->portcntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + } else { + u64 *cntr = dd->cspec->portcntrs; + struct qib_pportdata *ppd = &dd->pport[port]; + int i; + + ret = dd->cspec->nportcntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->nportcntrs; i++) { + if (portcntr7220indices[i] & _PORT_VIRT_FLAG) + *cntr++ = qib_portcntr_7220(ppd, + portcntr7220indices[i] & + ~_PORT_VIRT_FLAG); + else + *cntr++ = read_7220_creg32(dd, + portcntr7220indices[i]); + } + } +done: + return ret; +} + +/** + * qib_get_7220_faststats - get word counters from chip before they overflow + * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * + * This needs more work; in particular, decision on whether we really + * need traffic_wds done the way it is + * called from add_timer + */ +static void qib_get_7220_faststats(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_pportdata *ppd = dd->pport; + unsigned long flags; + u64 traffic_wds; + + /* + * don't access the chip while running diags, or memory diags can + * fail + */ + if (!(dd->flags & QIB_INITTED) || dd->diag_client) + /* but re-arm the timer, for diags case; won't hurt other */ + goto done; + + /* + * We now try to maintain an activity timer, based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + traffic_wds = qib_portcntr_7220(ppd, cr_wordsend) + + qib_portcntr_7220(ppd, cr_wordrcv); + spin_lock_irqsave(&dd->eep_st_lock, flags); + traffic_wds -= dd->traffic_wds; + dd->traffic_wds += traffic_wds; + if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(5, &dd->active_time); /* S/B #define */ + spin_unlock_irqrestore(&dd->eep_st_lock, flags); +done: + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); +} + +/* + * If we are using MSI, try to fallback to INTx. + */ +static int qib_7220_intr_fallback(struct qib_devdata *dd) +{ + if (!dd->msi_lo) + return 0; + + qib_devinfo(dd->pcidev, "MSI interrupt not detected," + " trying INTx interrupts\n"); + qib_7220_free_irq(dd); + qib_enable_intx(dd->pcidev); + /* + * Some newer kernels require free_irq before disable_msi, + * and irq can be changed during disable and INTx enable + * and we need to therefore use the pcidev->irq value, + * not our saved MSI value. + */ + dd->cspec->irq = dd->pcidev->irq; + qib_setup_7220_interrupt(dd); + return 1; +} + +/* + * Reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. + */ +static void qib_7220_xgxs_reset(struct qib_pportdata *ppd) +{ + u64 val, prev_val; + struct qib_devdata *dd = ppd->dd; + + prev_val = qib_read_kreg64(dd, kr_xgxs_cfg); + val = prev_val | QLOGIC_IB_XGXS_RESET; + prev_val &= ~QLOGIC_IB_XGXS_RESET; /* be sure */ + qib_write_kreg(dd, kr_control, + dd->control & ~QLOGIC_IB_C_LINKENABLE); + qib_write_kreg(dd, kr_xgxs_cfg, val); + qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_xgxs_cfg, prev_val); + qib_write_kreg(dd, kr_control, dd->control); +} + +/* + * For this chip, we want to use the same buffer every time + * when we are trying to bring the link up (they are always VL15 + * packets). At that link state the packet should always go out immediately + * (or at least be discarded at the tx interface if the link is down). + * If it doesn't, and the buffer isn't available, that means some other + * sender has gotten ahead of us, and is preventing our packet from going + * out. In that case, we flush all packets, and try again. If that still + * fails, we fail the request, and hope things work the next time around. + * + * We don't need very complicated heuristics on whether the packet had + * time to go out or not, since even at SDR 1X, it goes out in very short + * time periods, covered by the chip reads done here and as part of the + * flush. + */ +static u32 __iomem *get_7220_link_buf(struct qib_pportdata *ppd, u32 *bnum) +{ + u32 __iomem *buf; + u32 lbuf = ppd->dd->cspec->lastbuf_for_pio; + int do_cleanup; + unsigned long flags; + + /* + * always blip to get avail list updated, since it's almost + * always needed, and is fairly cheap. + */ + sendctrl_7220_mod(ppd->dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); + if (buf) + goto done; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (ppd->sdma_state.current_state == qib_sdma_state_s20_idle && + ppd->sdma_state.current_state != qib_sdma_state_s00_hw_down) { + __qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down); + do_cleanup = 0; + } else { + do_cleanup = 1; + qib_7220_sdma_hw_clean_up(ppd); + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + if (do_cleanup) { + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); + } +done: + return buf; +} + +/* + * This code for non-IBTA-compliant IB speed negotiation is only known to + * work for the SDR to DDR transition, and only between an HCA and a switch + * with recent firmware. It is based on observed heuristics, rather than + * actual knowledge of the non-compliant speed negotiation. + * It has a number of hard-coded fields, since the hope is to rewrite this + * when a spec is available on how the negoation is intended to work. + */ +static void autoneg_7220_sendpkt(struct qib_pportdata *ppd, u32 *hdr, + u32 dcnt, u32 *data) +{ + int i; + u64 pbc; + u32 __iomem *piobuf; + u32 pnum; + struct qib_devdata *dd = ppd->dd; + + i = 0; + pbc = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */ + pbc |= PBC_7220_VL15_SEND; + while (!(piobuf = get_7220_link_buf(ppd, &pnum))) { + if (i++ > 5) + return; + udelay(2); + } + sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_DISARM_BUF(pnum)); + writeq(pbc, piobuf); + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdr, 7); + qib_pio_copy(piobuf + 9, data, dcnt); + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + qib_flush_wc(); + qib_sendbuf_done(dd, pnum); +} + +/* + * _start packet gets sent twice at start, _done gets sent twice at end + */ +static void autoneg_7220_send(struct qib_pportdata *ppd, int which) +{ + struct qib_devdata *dd = ppd->dd; + static u32 swapped; + u32 dw, i, hcnt, dcnt, *data; + static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba }; + static u32 madpayload_start[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */ + }; + static u32 madpayload_done[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x40000001, 0x1388, 0x15e, /* rest 0's */ + }; + + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); + if (!swapped) { + /* for maintainability, do it at runtime */ + for (i = 0; i < hcnt; i++) { + dw = (__force u32) cpu_to_be32(hdr[i]); + hdr[i] = dw; + } + for (i = 0; i < dcnt; i++) { + dw = (__force u32) cpu_to_be32(madpayload_start[i]); + madpayload_start[i] = dw; + dw = (__force u32) cpu_to_be32(madpayload_done[i]); + madpayload_done[i] = dw; + } + swapped = 1; + } + + data = which ? madpayload_done : madpayload_start; + + autoneg_7220_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); + autoneg_7220_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); +} + +/* + * Do the absolute minimum to cause an IB speed change, and make it + * ready, but don't actually trigger the change. The caller will + * do that when ready (if link is in Polling training state, it will + * happen immediately, otherwise when link next goes down) + * + * This routine should only be used as part of the DDR autonegotation + * code for devices that are not compliant with IB 1.2 (or code that + * fixes things up for same). + * + * When link has gone down, and autoneg enabled, or autoneg has + * failed and we give up until next time we set both speeds, and + * then we want IBTA enabled as well as "use max enabled speed. + */ +static void set_7220_ibspeed_fast(struct qib_pportdata *ppd, u32 speed) +{ + ppd->cpspec->ibcddrctrl &= ~(IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK); + + if (speed == (QIB_IB_SDR | QIB_IB_DDR)) + ppd->cpspec->ibcddrctrl |= IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + else + ppd->cpspec->ibcddrctrl |= speed == QIB_IB_DDR ? + IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR; + + qib_write_kreg(ppd->dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl); + qib_write_kreg(ppd->dd, kr_scratch, 0); +} + +/* + * This routine is only used when we are not talking to another + * IB 1.2-compliant device that we think can do DDR. + * (This includes all existing switch chips as of Oct 2007.) + * 1.2-compliant devices go directly to DDR prior to reaching INIT + */ +static void try_7220_autoneg(struct qib_pportdata *ppd) +{ + unsigned long flags; + + /* + * Required for older non-IB1.2 DDR switches. Newer + * non-IB-compliant switches don't need it, but so far, + * aren't bothered by it either. "Magic constant" + */ + qib_write_kreg(ppd->dd, kr_ncmodectrl, 0x3b9dc07); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + autoneg_7220_send(ppd, 0); + set_7220_ibspeed_fast(ppd, QIB_IB_DDR); + + toggle_7220_rclkrls(ppd->dd); + /* 2 msec is minimum length of a poll cycle */ + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); +} + +/* + * Handle the empirically determined mechanism for auto-negotiation + * of DDR speed with switches. + */ +static void autoneg_7220_work(struct work_struct *work) +{ + struct qib_pportdata *ppd; + struct qib_devdata *dd; + u64 startms; + u32 i; + unsigned long flags; + + ppd = &container_of(work, struct qib_chippport_specific, + autoneg_work.work)->pportdata; + dd = ppd->dd; + + startms = jiffies_to_msecs(jiffies); + + /* + * Busy wait for this first part, it should be at most a + * few hundred usec, since we scheduled ourselves for 2msec. + */ + for (i = 0; i < 25; i++) { + if (SYM_FIELD(ppd->lastibcstat, IBCStatus, LinkTrainingState) + == IB_7220_LT_STATE_POLLQUIET) { + qib_set_linkstate(ppd, QIB_IB_LINKDOWN_DISABLE); + break; + } + udelay(100); + } + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + goto done; /* we got there early or told to stop */ + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(90))) + goto done; + + toggle_7220_rclkrls(dd); + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(1700))) + goto done; + + set_7220_ibspeed_fast(ppd, QIB_IB_SDR); + toggle_7220_rclkrls(dd); + + /* + * Wait up to 250 msec for link to train and get to INIT at DDR; + * this should terminate early. + */ + wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(250)); +done: + if (ppd->lflags & QIBL_IB_AUTONEG_INPROG) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + if (dd->cspec->autoneg_tries == AUTONEG_TRIES) { + ppd->lflags |= QIBL_IB_AUTONEG_FAILED; + dd->cspec->autoneg_tries = 0; + } + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + set_7220_ibspeed_fast(ppd, ppd->link_speed_enabled); + } +} + +static u32 qib_7220_iblink_state(u64 ibcs) +{ + u32 state = (u32)SYM_FIELD(ibcs, IBCStatus, LinkState); + + switch (state) { + case IB_7220_L_STATE_INIT: + state = IB_PORT_INIT; + break; + case IB_7220_L_STATE_ARM: + state = IB_PORT_ARMED; + break; + case IB_7220_L_STATE_ACTIVE: + /* fall through */ + case IB_7220_L_STATE_ACT_DEFER: + state = IB_PORT_ACTIVE; + break; + default: /* fall through */ + case IB_7220_L_STATE_DOWN: + state = IB_PORT_DOWN; + break; + } + return state; +} + +/* returns the IBTA port state, rather than the IBC link training state */ +static u8 qib_7220_phys_portstate(u64 ibcs) +{ + u8 state = (u8)SYM_FIELD(ibcs, IBCStatus, LinkTrainingState); + return qib_7220_physportstate[state]; +} + +static int qib_7220_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) +{ + int ret = 0, symadj = 0; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + if (!ibup) { + /* + * When the link goes down we don't want AEQ running, so it + * won't interfere with IBC training, etc., and we need + * to go back to the static SerDes preset values. + */ + if (!(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG))) + set_7220_ibspeed_fast(ppd, ppd->link_speed_enabled); + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + qib_sd7220_presets(dd); + qib_cancel_sends(ppd); /* initial disarm, etc. */ + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (__qib_sdma_running(ppd)) + __qib_sdma_process_event(ppd, + qib_sdma_event_e70_go_idle); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + /* this might better in qib_sd7220_presets() */ + set_7220_relock_poll(dd, ibup); + } else { + if (qib_compat_ddr_negotiate && + !(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG)) && + ppd->link_speed_active == QIB_IB_SDR && + (ppd->link_speed_enabled & (QIB_IB_DDR | QIB_IB_SDR)) == + (QIB_IB_DDR | QIB_IB_SDR) && + dd->cspec->autoneg_tries < AUTONEG_TRIES) { + /* we are SDR, and DDR auto-negotiation enabled */ + ++dd->cspec->autoneg_tries; + if (!ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7220_creg32(dd, + cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7220_creg32(dd, + cr_iblinkerrrecov); + } + try_7220_autoneg(ppd); + ret = 1; /* no other IB status change processing */ + } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + ppd->link_speed_active == QIB_IB_SDR) { + autoneg_7220_send(ppd, 1); + set_7220_ibspeed_fast(ppd, QIB_IB_DDR); + udelay(2); + toggle_7220_rclkrls(dd); + ret = 1; /* no other IB status change processing */ + } else { + if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + (ppd->link_speed_active & QIB_IB_DDR)) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~(QIBL_IB_AUTONEG_INPROG | + QIBL_IB_AUTONEG_FAILED); + spin_unlock_irqrestore(&ppd->lflags_lock, + flags); + dd->cspec->autoneg_tries = 0; + /* re-enable SDR, for next link down */ + set_7220_ibspeed_fast(ppd, + ppd->link_speed_enabled); + wake_up(&ppd->cpspec->autoneg_wait); + symadj = 1; + } else if (ppd->lflags & QIBL_IB_AUTONEG_FAILED) { + /* + * Clear autoneg failure flag, and do setup + * so we'll try next time link goes down and + * back to INIT (possibly connected to a + * different device). + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, + flags); + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_IBTA_1_2_MASK; + qib_write_kreg(dd, kr_ncmodectrl, 0); + symadj = 1; + } + } + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + symadj = 1; + + if (!ret) { + ppd->delay_mult = rate_to_delay + [(ibcs >> IBA7220_LINKSPEED_SHIFT) & 1] + [(ibcs >> IBA7220_LINKWIDTH_SHIFT) & 1]; + + set_7220_relock_poll(dd, ibup); + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* + * Unlike 7322, the 7220 needs this, due to lack of + * interrupt in some cases when we have sdma active + * when the link goes down. + */ + if (ppd->sdma_state.current_state != + qib_sdma_state_s20_idle) + __qib_sdma_process_event(ppd, + qib_sdma_event_e00_go_hw_down); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + + if (symadj) { + if (ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 0; + ppd->cpspec->ibsymdelta += read_7220_creg32(ppd->dd, + cr_ibsymbolerr) - ppd->cpspec->ibsymsnap; + ppd->cpspec->iblnkerrdelta += read_7220_creg32(ppd->dd, + cr_iblinkerrrecov) - ppd->cpspec->iblnkerrsnap; + } + } else if (!ibup && qib_compat_ddr_negotiate && + !ppd->cpspec->ibdeltainprog && + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7220_creg32(ppd->dd, + cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7220_creg32(ppd->dd, + cr_iblinkerrrecov); + } + + if (!ret) + qib_setup_7220_setextled(ppd, ibup); + return ret; +} + +/* + * Does read/modify/write to appropriate registers to + * set output and direction bits selected by mask. + * these are in their canonical postions (e.g. lsb of + * dir will end up in D48 of extctrl on existing chips). + * returns contents of GP Inputs. + */ +static int gpio_7220_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask) +{ + u64 read_val, new_out; + unsigned long flags; + + if (mask) { + /* some bits being written, lock access to GPIO */ + dir &= mask; + out &= mask; + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe)); + dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe)); + new_out = (dd->cspec->gpio_out & ~mask) | out; + + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_out, new_out); + dd->cspec->gpio_out = new_out; + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + } + /* + * It is unlikely that a read at this time would get valid + * data on a pin whose direction line was set in the same + * call to this function. We include the read here because + * that allows us to potentially combine a change on one pin with + * a read on another, and because the old code did something like + * this. + */ + read_val = qib_read_kreg64(dd, kr_extstatus); + return SYM_FIELD(read_val, EXTStatus, GPIOIn); +} + +/* + * Read fundamental info we need to use the chip. These are + * the registers that describe chip capabilities, and are + * saved in shadow registers. + */ +static void get_7220_chip_params(struct qib_devdata *dd) +{ + u64 val; + u32 piobufs; + int mtu; + + dd->uregbase = qib_read_kreg32(dd, kr_userregbase); + + dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt); + dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase); + dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase); + dd->palign = qib_read_kreg32(dd, kr_palign); + dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase); + dd->pio2k_bufbase = dd->piobufbase & 0xffffffff; + + val = qib_read_kreg64(dd, kr_sendpiosize); + dd->piosize2k = val & ~0U; + dd->piosize4k = val >> 32; + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + dd->pport->ibmtu = (u32)mtu; + + val = qib_read_kreg64(dd, kr_sendpiobufcnt); + dd->piobcnt2k = val & ~0U; + dd->piobcnt4k = val >> 32; + /* these may be adjusted in init_chip_wc_pat() */ + dd->pio2kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + dd->pio2k_bufbase); + if (dd->piobcnt4k) { + dd->pio4kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + + (dd->piobufbase >> 32)); + /* + * 4K buffers take 2 pages; we use roundup just to be + * paranoid; we calculate it once here, rather than on + * ever buf allocate + */ + dd->align4k = ALIGN(dd->piosize4k, dd->palign); + } + + piobufs = dd->piobcnt4k + dd->piobcnt2k; + + dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / + (sizeof(u64) * BITS_PER_BYTE / 2); +} + +/* + * The chip base addresses in cspec and cpspec have to be set + * after possible init_chip_wc_pat(), rather than in + * qib_get_7220_chip_params(), so split out as separate function + */ +static void set_7220_baseaddrs(struct qib_devdata *dd) +{ + u32 cregbase; + /* init after possible re-map in init_chip_wc_pat() */ + cregbase = qib_read_kreg32(dd, kr_counterregbase); + dd->cspec->cregbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + cregbase); + + dd->egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + dd->rcvegrbase); +} + + +#define SENDCTRL_SHADOWED (SYM_MASK(SendCtrl, SendIntBufAvail) | \ + SYM_MASK(SendCtrl, SPioEnable) | \ + SYM_MASK(SendCtrl, SSpecialTriggerEn) | \ + SYM_MASK(SendCtrl, SendBufAvailUpd) | \ + SYM_MASK(SendCtrl, AvailUpdThld) | \ + SYM_MASK(SendCtrl, SDmaEnable) | \ + SYM_MASK(SendCtrl, SDmaIntEnable) | \ + SYM_MASK(SendCtrl, SDmaHalt) | \ + SYM_MASK(SendCtrl, SDmaSingleDescriptor)) + +static int sendctrl_hook(struct qib_devdata *dd, + const struct diag_observer *op, + u32 offs, u64 *data, u64 mask, int only_32) +{ + unsigned long flags; + unsigned idx = offs / sizeof(u64); + u64 local_data, all_bits; + + if (idx != kr_sendctrl) { + qib_dev_err(dd, "SendCtrl Hook called with offs %X, %s-bit\n", + offs, only_32 ? "32" : "64"); + return 0; + } + + all_bits = ~0ULL; + if (only_32) + all_bits >>= 32; + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if ((mask & all_bits) != all_bits) { + /* + * At least some mask bits are zero, so we need + * to read. The judgement call is whether from + * reg or shadow. First-cut: read reg, and complain + * if any bits which should be shadowed are different + * from their shadowed value. + */ + if (only_32) + local_data = (u64)qib_read_kreg32(dd, idx); + else + local_data = qib_read_kreg64(dd, idx); + qib_dev_err(dd, "Sendctrl -> %X, Shad -> %X\n", + (u32)local_data, (u32)dd->sendctrl); + if ((local_data & SENDCTRL_SHADOWED) != + (dd->sendctrl & SENDCTRL_SHADOWED)) + qib_dev_err(dd, "Sendctrl read: %X shadow is %X\n", + (u32)local_data, (u32) dd->sendctrl); + *data = (local_data & ~mask) | (*data & mask); + } + if (mask) { + /* + * At least some mask bits are one, so we need + * to write, but only shadow some bits. + */ + u64 sval, tval; /* Shadowed, transient */ + + /* + * New shadow val is bits we don't want to touch, + * ORed with bits we do, that are intended for shadow. + */ + sval = (dd->sendctrl & ~mask); + sval |= *data & SENDCTRL_SHADOWED & mask; + dd->sendctrl = sval; + tval = sval | (*data & ~SENDCTRL_SHADOWED & mask); + qib_dev_err(dd, "Sendctrl <- %X, Shad <- %X\n", + (u32)tval, (u32)sval); + qib_write_kreg(dd, kr_sendctrl, tval); + qib_write_kreg(dd, kr_scratch, 0Ull); + } + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + return only_32 ? 4 : 8; +} + +static const struct diag_observer sendctrl_observer = { + sendctrl_hook, kr_sendctrl * sizeof(u64), + kr_sendctrl * sizeof(u64) +}; + +/* + * write the final few registers that depend on some of the + * init setup. Done late in init, just before bringing up + * the serdes. + */ +static int qib_late_7220_initreg(struct qib_devdata *dd) +{ + int ret = 0; + u64 val; + + qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize); + qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize); + qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt); + qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); + val = qib_read_kreg64(dd, kr_sendpioavailaddr); + if (val != dd->pioavailregs_phys) { + qib_dev_err(dd, "Catastrophic software error, " + "SendPIOAvailAddr written as %lx, " + "read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); + ret = -EINVAL; + } + qib_register_observer(dd, &sendctrl_observer); + return ret; +} + +static int qib_init_7220_variables(struct qib_devdata *dd) +{ + struct qib_chippport_specific *cpspec; + struct qib_pportdata *ppd; + int ret = 0; + u32 sbufs, updthresh; + + cpspec = (struct qib_chippport_specific *)(dd + 1); + ppd = &cpspec->pportdata; + dd->pport = ppd; + dd->num_pports = 1; + + dd->cspec = (struct qib_chip_specific *)(cpspec + dd->num_pports); + ppd->cpspec = cpspec; + + spin_lock_init(&dd->cspec->sdepb_lock); + spin_lock_init(&dd->cspec->rcvmod_lock); + spin_lock_init(&dd->cspec->gpio_lock); + + /* we haven't yet set QIB_PRESENT, so use read directly */ + dd->revision = readq(&dd->kregbase[kr_revision]); + + if ((dd->revision & 0xffffffffU) == 0xffffffffU) { + qib_dev_err(dd, "Revision register read failure, " + "giving up initialization\n"); + ret = -ENODEV; + goto bail; + } + dd->flags |= QIB_PRESENT; /* now register routines work */ + + dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMajor); + dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMinor); + + get_7220_chip_params(dd); + qib_7220_boardname(dd); + + /* + * GPIO bits for TWSI data and clock, + * used for serial EEPROM. + */ + dd->gpio_sda_num = _QIB_GPIO_SDA_NUM; + dd->gpio_scl_num = _QIB_GPIO_SCL_NUM; + dd->twsi_eeprom_dev = QIB_TWSI_EEPROM_DEV; + + dd->flags |= QIB_HAS_INTX | QIB_HAS_LINK_LATENCY | + QIB_NODMA_RTAIL | QIB_HAS_THRESH_UPDATE; + dd->flags |= qib_special_trigger ? + QIB_USE_SPCL_TRIG : QIB_HAS_SEND_DMA; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr); + + dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr); + + dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); + + init_waitqueue_head(&cpspec->autoneg_wait); + INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); + + qib_init_pportdata(ppd, dd, 0, 1); + ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; + + ppd->link_width_enabled = ppd->link_width_supported; + ppd->link_speed_enabled = ppd->link_speed_supported; + /* + * Set the initial values to reasonable default, will be set + * for real when link is up. + */ + ppd->link_width_active = IB_WIDTH_4X; + ppd->link_speed_active = QIB_IB_SDR; + ppd->delay_mult = rate_to_delay[0][1]; + ppd->vls_supported = IB_VL_VL0; + ppd->vls_operational = ppd->vls_supported; + + if (!qib_mini_init) + qib_write_kreg(dd, kr_rcvbthqp, QIB_KD_QP); + + init_timer(&ppd->cpspec->chase_timer); + ppd->cpspec->chase_timer.function = reenable_7220_chase; + ppd->cpspec->chase_timer.data = (unsigned long)ppd; + + qib_num_cfg_vls = 1; /* if any 7220's, only one VL */ + + dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE; + dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE; + dd->rhf_offset = + dd->rcvhdrentsize - sizeof(u64) / sizeof(u32); + + /* we always allocate at least 2048 bytes for eager buffers */ + ret = ib_mtu_enum_to_int(qib_ibmtu); + dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); + + qib_7220_tidtemplate(dd); + + /* + * We can request a receive interrupt for 1 or + * more packets from current offset. For now, we set this + * up for a single packet. + */ + dd->rhdrhead_intr_off = 1ULL << 32; + + /* setup the stats timer; the add_timer is done at end of init */ + init_timer(&dd->stats_timer); + dd->stats_timer.function = qib_get_7220_faststats; + dd->stats_timer.data = (unsigned long) dd; + dd->stats_timer.expires = jiffies + ACTIVITY_TIMER * HZ; + + /* + * Control[4] has been added to change the arbitration within + * the SDMA engine between favoring data fetches over descriptor + * fetches. qib_sdma_fetch_arb==0 gives data fetches priority. + */ + if (qib_sdma_fetch_arb) + dd->control |= 1 << 4; + + dd->ureg_align = 0x10000; /* 64KB alignment */ + + dd->piosize2kmax_dwords = (dd->piosize2k >> 2)-1; + qib_7220_config_ctxts(dd); + qib_set_ctxtcnt(dd); /* needed for PAT setup */ + + if (qib_wc_pat) { + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; + } + set_7220_baseaddrs(dd); /* set chip access pointers now */ + + ret = 0; + if (qib_mini_init) + goto bail; + + ret = qib_create_ctxts(dd); + init_7220_cntrnames(dd); + + /* use all of 4KB buffers for the kernel SDMA, zero if !SDMA. + * reserve the update threshold amount for other kernel use, such + * as sending SMI, MAD, and ACKs, or 3, whichever is greater, + * unless we aren't enabling SDMA, in which case we want to use + * all the 4k bufs for the kernel. + * if this was less than the update threshold, we could wait + * a long time for an update. Coded this way because we + * sometimes change the update threshold for various reasons, + * and we want this to remain robust. + */ + updthresh = 8U; /* update threshold */ + if (dd->flags & QIB_HAS_SEND_DMA) { + dd->cspec->sdmabufcnt = dd->piobcnt4k; + sbufs = updthresh > 3 ? updthresh : 3; + } else { + dd->cspec->sdmabufcnt = 0; + sbufs = dd->piobcnt4k; + } + + dd->cspec->lastbuf_for_pio = dd->piobcnt2k + dd->piobcnt4k - + dd->cspec->sdmabufcnt; + dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; + dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->pbufsctxt = dd->lastctxt_piobuf / + (dd->cfgctxts - dd->first_user_ctxt); + + /* + * if we are at 16 user contexts, we will have one 7 sbufs + * per context, so drop the update threshold to match. We + * want to update before we actually run out, at low pbufs/ctxt + * so give ourselves some margin + */ + if ((dd->pbufsctxt - 2) < updthresh) + updthresh = dd->pbufsctxt - 2; + + dd->cspec->updthresh_dflt = updthresh; + dd->cspec->updthresh = updthresh; + + /* before full enable, no interrupts, no locking needed */ + dd->sendctrl |= (updthresh & SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld); + + dd->psxmitwait_supported = 1; + dd->psxmitwait_check_rate = QIB_7220_PSXMITWAIT_CHECK_RATE; +bail: + return ret; +} + +static u32 __iomem *qib_7220_getsendbuf(struct qib_pportdata *ppd, u64 pbc, + u32 *pbufnum) +{ + u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK; + struct qib_devdata *dd = ppd->dd; + u32 __iomem *buf; + + if (((pbc >> 32) & PBC_7220_VL15_SEND_CTRL) && + !(ppd->lflags & (QIBL_IB_AUTONEG_INPROG | QIBL_LINKACTIVE))) + buf = get_7220_link_buf(ppd, pbufnum); + else { + if ((plen + 1) > dd->piosize2kmax_dwords) + first = dd->piobcnt2k; + else + first = 0; + /* try 4k if all 2k busy, so same last for both sizes */ + last = dd->cspec->lastbuf_for_pio; + buf = qib_getsendbuf_range(dd, pbufnum, first, last); + } + return buf; +} + +/* these 2 "counters" are really control registers, and are always RW */ +static void qib_set_cntr_7220_sample(struct qib_pportdata *ppd, u32 intv, + u32 start) +{ + write_7220_creg(ppd->dd, cr_psinterval, intv); + write_7220_creg(ppd->dd, cr_psstart, start); +} + +/* + * NOTE: no real attempt is made to generalize the SDMA stuff. + * At some point "soon" we will have a new more generalized + * set of sdma interface, and then we'll clean this up. + */ + +/* Must be called with sdma_lock held, or before init finished */ +static void qib_sdma_update_7220_tail(struct qib_pportdata *ppd, u16 tail) +{ + /* Commit writes to memory and advance the tail on the chip */ + wmb(); + ppd->sdma_descq_tail = tail; + qib_write_kreg(ppd->dd, kr_senddmatail, tail); +} + +static void qib_sdma_set_7220_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) +{ +} + +static struct sdma_set_state_action sdma_7220_action_table[] = { + [qib_sdma_state_s00_hw_down] = { + .op_enable = 0, + .op_intenable = 0, + .op_halt = 0, + .go_s99_running_tofalse = 1, + }, + [qib_sdma_state_s10_hw_start_up_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s20_idle] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s30_sw_clean_up_wait] = { + .op_enable = 0, + .op_intenable = 1, + .op_halt = 0, + }, + [qib_sdma_state_s40_hw_clean_up_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s50_hw_halt_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s99_running] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 0, + .go_s99_running_totrue = 1, + }, +}; + +static void qib_7220_sdma_init_early(struct qib_pportdata *ppd) +{ + ppd->sdma_state.set_state_action = sdma_7220_action_table; +} + +static int init_sdma_7220_regs(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + unsigned i, n; + u64 senddmabufmask[3] = { 0 }; + + /* Set SendDmaBase */ + qib_write_kreg(dd, kr_senddmabase, ppd->sdma_descq_phys); + qib_sdma_7220_setlengen(ppd); + qib_sdma_update_7220_tail(ppd, 0); /* Set SendDmaTail */ + /* Set SendDmaHeadAddr */ + qib_write_kreg(dd, kr_senddmaheadaddr, ppd->sdma_head_phys); + + /* + * Reserve all the former "kernel" piobufs, using high number range + * so we get as many 4K buffers as possible + */ + n = dd->piobcnt2k + dd->piobcnt4k; + i = n - dd->cspec->sdmabufcnt; + + for (; i < n; ++i) { + unsigned word = i / 64; + unsigned bit = i & 63; + + BUG_ON(word >= 3); + senddmabufmask[word] |= 1ULL << bit; + } + qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]); + qib_write_kreg(dd, kr_senddmabufmask1, senddmabufmask[1]); + qib_write_kreg(dd, kr_senddmabufmask2, senddmabufmask[2]); + + ppd->sdma_state.first_sendbuf = i; + ppd->sdma_state.last_sendbuf = n; + + return 0; +} + +/* sdma_lock must be held */ +static u16 qib_sdma_7220_gethead(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + int sane; + int use_dmahead; + u16 swhead; + u16 swtail; + u16 cnt; + u16 hwhead; + + use_dmahead = __qib_sdma_running(ppd) && + (dd->flags & QIB_HAS_SDMA_TIMEOUT); +retry: + hwhead = use_dmahead ? + (u16)le64_to_cpu(*ppd->sdma_head_dma) : + (u16)qib_read_kreg32(dd, kr_senddmahead); + + swhead = ppd->sdma_descq_head; + swtail = ppd->sdma_descq_tail; + cnt = ppd->sdma_descq_cnt; + + if (swhead < swtail) { + /* not wrapped */ + sane = (hwhead >= swhead) & (hwhead <= swtail); + } else if (swhead > swtail) { + /* wrapped around */ + sane = ((hwhead >= swhead) && (hwhead < cnt)) || + (hwhead <= swtail); + } else { + /* empty */ + sane = (hwhead == swhead); + } + + if (unlikely(!sane)) { + if (use_dmahead) { + /* try one more time, directly from the register */ + use_dmahead = 0; + goto retry; + } + /* assume no progress */ + hwhead = swhead; + } + + return hwhead; +} + +static int qib_sdma_7220_busy(struct qib_pportdata *ppd) +{ + u64 hwstatus = qib_read_kreg64(ppd->dd, kr_senddmastatus); + + return (hwstatus & SYM_MASK(SendDmaStatus, ScoreBoardDrainInProg)) || + (hwstatus & SYM_MASK(SendDmaStatus, AbortInProg)) || + (hwstatus & SYM_MASK(SendDmaStatus, InternalSDmaEnable)) || + !(hwstatus & SYM_MASK(SendDmaStatus, ScbEmpty)); +} + +/* + * Compute the amount of delay before sending the next packet if the + * port's send rate differs from the static rate set for the QP. + * Since the delay affects this packet but the amount of the delay is + * based on the length of the previous packet, use the last delay computed + * and save the delay count for this packet to be used next time + * we get here. + */ +static u32 qib_7220_setpbc_control(struct qib_pportdata *ppd, u32 plen, + u8 srate, u8 vl) +{ + u8 snd_mult = ppd->delay_mult; + u8 rcv_mult = ib_rate_to_delay[srate]; + u32 ret = ppd->cpspec->last_delay_mult; + + ppd->cpspec->last_delay_mult = (rcv_mult > snd_mult) ? + (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; + + /* Indicate VL15, if necessary */ + if (vl == 15) + ret |= PBC_7220_VL15_SEND_CTRL; + return ret; +} + +static void qib_7220_initvl15_bufs(struct qib_devdata *dd) +{ +} + +static void qib_7220_init_ctxt(struct qib_ctxtdata *rcd) +{ + if (!rcd->ctxt) { + rcd->rcvegrcnt = IBA7220_KRCVEGRCNT; + rcd->rcvegr_tid_base = 0; + } else { + rcd->rcvegrcnt = rcd->dd->cspec->rcvegrcnt; + rcd->rcvegr_tid_base = IBA7220_KRCVEGRCNT + + (rcd->ctxt - 1) * rcd->rcvegrcnt; + } +} + +static void qib_7220_txchk_change(struct qib_devdata *dd, u32 start, + u32 len, u32 which, struct qib_ctxtdata *rcd) +{ + int i; + unsigned long flags; + + switch (which) { + case TXCHK_CHG_TYPE_KERN: + /* see if we need to raise avail update threshold */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + for (i = dd->first_user_ctxt; + dd->cspec->updthresh != dd->cspec->updthresh_dflt + && i < dd->cfgctxts; i++) + if (dd->rcd[i] && dd->rcd[i]->subctxt_cnt && + ((dd->rcd[i]->piocnt / dd->rcd[i]->subctxt_cnt) - 1) + < dd->cspec->updthresh_dflt) + break; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + if (i == dd->cfgctxts) { + spin_lock_irqsave(&dd->sendctrl_lock, flags); + dd->cspec->updthresh = dd->cspec->updthresh_dflt; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) << + SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } + break; + case TXCHK_CHG_TYPE_USER: + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (rcd && rcd->subctxt_cnt && ((rcd->piocnt + / rcd->subctxt_cnt) - 1) < dd->cspec->updthresh) { + dd->cspec->updthresh = (rcd->piocnt / + rcd->subctxt_cnt) - 1; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } else + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + break; + } +} + +static void writescratch(struct qib_devdata *dd, u32 val) +{ + qib_write_kreg(dd, kr_scratch, val); +} + +#define VALID_TS_RD_REG_MASK 0xBF +/** + * qib_7220_tempsense_read - read register of temp sensor via TWSI + * @dd: the qlogic_ib device + * @regnum: register to read from + * + * returns reg contents (0..255) or < 0 for error + */ +static int qib_7220_tempsense_rd(struct qib_devdata *dd, int regnum) +{ + int ret; + u8 rdata; + + if (regnum > 7) { + ret = -EINVAL; + goto bail; + } + + /* return a bogus value for (the one) register we do not have */ + if (!((1 << regnum) & VALID_TS_RD_REG_MASK)) { + ret = 0; + goto bail; + } + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) + goto bail; + + ret = qib_twsi_blk_rd(dd, QIB_TWSI_TEMP_DEV, regnum, &rdata, 1); + if (!ret) + ret = rdata; + + mutex_unlock(&dd->eep_lock); + + /* + * There are three possibilities here: + * ret is actual value (0..255) + * ret is -ENXIO or -EINVAL from twsi code or this file + * ret is -EINTR from mutex_lock_interruptible. + */ +bail: + return ret; +} + +/* Dummy function, as 7220 boards never disable EEPROM Write */ +static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) +{ + return 1; +} + +/** + * qib_init_iba7220_funcs - set up the chip-specific function pointers + * @dev: the pci_dev for qlogic_ib device + * @ent: pci_device_id struct for this dev + * + * This is global, and is called directly at init to set up the + * chip-specific function pointers for later use. + */ +struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct qib_devdata *dd; + int ret; + u32 boardid, minwidth; + + dd = qib_alloc_devdata(pdev, sizeof(struct qib_chip_specific) + + sizeof(struct qib_chippport_specific)); + if (IS_ERR(dd)) + goto bail; + + dd->f_bringup_serdes = qib_7220_bringup_serdes; + dd->f_cleanup = qib_setup_7220_cleanup; + dd->f_clear_tids = qib_7220_clear_tids; + dd->f_free_irq = qib_7220_free_irq; + dd->f_get_base_info = qib_7220_get_base_info; + dd->f_get_msgheader = qib_7220_get_msgheader; + dd->f_getsendbuf = qib_7220_getsendbuf; + dd->f_gpio_mod = gpio_7220_mod; + dd->f_eeprom_wen = qib_7220_eeprom_wen; + dd->f_hdrqempty = qib_7220_hdrqempty; + dd->f_ib_updown = qib_7220_ib_updown; + dd->f_init_ctxt = qib_7220_init_ctxt; + dd->f_initvl15_bufs = qib_7220_initvl15_bufs; + dd->f_intr_fallback = qib_7220_intr_fallback; + dd->f_late_initreg = qib_late_7220_initreg; + dd->f_setpbc_control = qib_7220_setpbc_control; + dd->f_portcntr = qib_portcntr_7220; + dd->f_put_tid = qib_7220_put_tid; + dd->f_quiet_serdes = qib_7220_quiet_serdes; + dd->f_rcvctrl = rcvctrl_7220_mod; + dd->f_read_cntrs = qib_read_7220cntrs; + dd->f_read_portcntrs = qib_read_7220portcntrs; + dd->f_reset = qib_setup_7220_reset; + dd->f_init_sdma_regs = init_sdma_7220_regs; + dd->f_sdma_busy = qib_sdma_7220_busy; + dd->f_sdma_gethead = qib_sdma_7220_gethead; + dd->f_sdma_sendctrl = qib_7220_sdma_sendctrl; + dd->f_sdma_set_desc_cnt = qib_sdma_set_7220_desc_cnt; + dd->f_sdma_update_tail = qib_sdma_update_7220_tail; + dd->f_sdma_hw_clean_up = qib_7220_sdma_hw_clean_up; + dd->f_sdma_hw_start_up = qib_7220_sdma_hw_start_up; + dd->f_sdma_init_early = qib_7220_sdma_init_early; + dd->f_sendctrl = sendctrl_7220_mod; + dd->f_set_armlaunch = qib_set_7220_armlaunch; + dd->f_set_cntr_sample = qib_set_cntr_7220_sample; + dd->f_iblink_state = qib_7220_iblink_state; + dd->f_ibphys_portstate = qib_7220_phys_portstate; + dd->f_get_ib_cfg = qib_7220_get_ib_cfg; + dd->f_set_ib_cfg = qib_7220_set_ib_cfg; + dd->f_set_ib_loopback = qib_7220_set_loopback; + dd->f_set_intr_state = qib_7220_set_intr_state; + dd->f_setextled = qib_setup_7220_setextled; + dd->f_txchk_change = qib_7220_txchk_change; + dd->f_update_usrhead = qib_update_7220_usrhead; + dd->f_wantpiobuf_intr = qib_wantpiobuf_7220_intr; + dd->f_xgxs_reset = qib_7220_xgxs_reset; + dd->f_writescratch = writescratch; + dd->f_tempsense_rd = qib_7220_tempsense_rd; + /* + * Do remaining pcie setup and save pcie values in dd. + * Any error printing is already done by the init code. + * On return, we have the chip mapped, but chip registers + * are not set up until start of qib_init_7220_variables. + */ + ret = qib_pcie_ddinit(dd, pdev, ent); + if (ret < 0) + goto bail_free; + + /* initialize chip-specific variables */ + ret = qib_init_7220_variables(dd); + if (ret) + goto bail_cleanup; + + if (qib_mini_init) + goto bail; + + boardid = SYM_FIELD(dd->revision, Revision, + BoardID); + switch (boardid) { + case 0: + case 2: + case 10: + case 12: + minwidth = 16; /* x16 capable boards */ + break; + default: + minwidth = 8; /* x8 capable boards */ + break; + } + if (qib_pcie_params(dd, minwidth, NULL, NULL)) + qib_dev_err(dd, "Failed to setup PCIe or interrupts; " + "continuing anyway\n"); + + /* save IRQ for possible later use */ + dd->cspec->irq = pdev->irq; + + if (qib_read_kreg64(dd, kr_hwerrstatus) & + QLOGIC_IB_HWE_SERDESPLLFAILED) + qib_write_kreg(dd, kr_hwerrclear, + QLOGIC_IB_HWE_SERDESPLLFAILED); + + /* setup interrupt handler (interrupt type handled above) */ + qib_setup_7220_interrupt(dd); + qib_7220_init_hwerrors(dd); + + /* clear diagctrl register, in case diags were running and crashed */ + qib_write_kreg(dd, kr_hwdiagctrl, 0); + + goto bail; + +bail_cleanup: + qib_pcie_ddcleanup(dd); +bail_free: + qib_free_devdata(dd); + dd = ERR_PTR(ret); +bail: + return dd; +} diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c new file mode 100644 index 00000000..060b9606 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -0,0 +1,8111 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file contains all of the code that is specific to the + * InfiniPath 7322 chip + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qib.h" +#include "qib_7322_regs.h" +#include "qib_qsfp.h" + +#include "qib_mad.h" + +static void qib_setup_7322_setextled(struct qib_pportdata *, u32); +static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t); +static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op); +static irqreturn_t qib_7322intr(int irq, void *data); +static irqreturn_t qib_7322bufavail(int irq, void *data); +static irqreturn_t sdma_intr(int irq, void *data); +static irqreturn_t sdma_idle_intr(int irq, void *data); +static irqreturn_t sdma_progress_intr(int irq, void *data); +static irqreturn_t sdma_cleanup_intr(int irq, void *data); +static void qib_7322_txchk_change(struct qib_devdata *, u32, u32, u32, + struct qib_ctxtdata *rcd); +static u8 qib_7322_phys_portstate(u64); +static u32 qib_7322_iblink_state(u64); +static void qib_set_ib_7322_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd); +static void force_h1(struct qib_pportdata *); +static void adj_tx_serdes(struct qib_pportdata *); +static u32 qib_7322_setpbc_control(struct qib_pportdata *, u32, u8, u8); +static void qib_7322_mini_pcs_reset(struct qib_pportdata *); + +static u32 ahb_mod(struct qib_devdata *, int, int, int, u32, u32); +static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned); +static void serdes_7322_los_enable(struct qib_pportdata *, int); +static int serdes_7322_init_old(struct qib_pportdata *); +static int serdes_7322_init_new(struct qib_pportdata *); + +#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) + +/* LE2 serdes values for different cases */ +#define LE2_DEFAULT 5 +#define LE2_5m 4 +#define LE2_QME 0 + +/* Below is special-purpose, so only really works for the IB SerDes blocks. */ +#define IBSD(hw_pidx) (hw_pidx + 2) + +/* these are variables for documentation and experimentation purposes */ +static const unsigned rcv_int_timeout = 375; +static const unsigned rcv_int_count = 16; +static const unsigned sdma_idle_cnt = 64; + +/* Time to stop altering Rx Equalization parameters, after link up. */ +#define RXEQ_DISABLE_MSECS 2500 + +/* + * Number of VLs we are configured to use (to allow for more + * credits per vl, etc.) + */ +ushort qib_num_cfg_vls = 2; +module_param_named(num_vls, qib_num_cfg_vls, ushort, S_IRUGO); +MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)"); + +static ushort qib_chase = 1; +module_param_named(chase, qib_chase, ushort, S_IRUGO); +MODULE_PARM_DESC(chase, "Enable state chase handling"); + +static ushort qib_long_atten = 10; /* 10 dB ~= 5m length */ +module_param_named(long_attenuation, qib_long_atten, ushort, S_IRUGO); +MODULE_PARM_DESC(long_attenuation, \ + "attenuation cutoff (dB) for long copper cable setup"); + +static ushort qib_singleport; +module_param_named(singleport, qib_singleport, ushort, S_IRUGO); +MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space"); + +static ushort qib_krcvq01_no_msi; +module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO); +MODULE_PARM_DESC(krcvq01_no_msi, "No MSI for kctx < 2"); + +/* + * Receive header queue sizes + */ +static unsigned qib_rcvhdrcnt; +module_param_named(rcvhdrcnt, qib_rcvhdrcnt, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrcnt, "receive header count"); + +static unsigned qib_rcvhdrsize; +module_param_named(rcvhdrsize, qib_rcvhdrsize, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrsize, "receive header size in 32-bit words"); + +static unsigned qib_rcvhdrentsize; +module_param_named(rcvhdrentsize, qib_rcvhdrentsize, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrentsize, "receive header entry size in 32-bit words"); + +#define MAX_ATTEN_LEN 64 /* plenty for any real system */ +/* for read back, default index is ~5m copper cable */ +static char txselect_list[MAX_ATTEN_LEN] = "10"; +static struct kparam_string kp_txselect = { + .string = txselect_list, + .maxlen = MAX_ATTEN_LEN +}; +static int setup_txselect(const char *, struct kernel_param *); +module_param_call(txselect, setup_txselect, param_get_string, + &kp_txselect, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(txselect, \ + "Tx serdes indices (for no QSFP or invalid QSFP data)"); + +#define BOARD_QME7342 5 +#define BOARD_QMH7342 6 +#define IS_QMH(dd) (SYM_FIELD((dd)->revision, Revision, BoardID) == \ + BOARD_QMH7342) +#define IS_QME(dd) (SYM_FIELD((dd)->revision, Revision, BoardID) == \ + BOARD_QME7342) + +#define KREG_IDX(regname) (QIB_7322_##regname##_OFFS / sizeof(u64)) + +#define KREG_IBPORT_IDX(regname) ((QIB_7322_##regname##_0_OFFS / sizeof(u64))) + +#define MASK_ACROSS(lsb, msb) \ + (((1ULL << ((msb) + 1 - (lsb))) - 1) << (lsb)) + +#define SYM_RMASK(regname, fldname) ((u64) \ + QIB_7322_##regname##_##fldname##_RMASK) + +#define SYM_MASK(regname, fldname) ((u64) \ + QIB_7322_##regname##_##fldname##_RMASK << \ + QIB_7322_##regname##_##fldname##_LSB) + +#define SYM_FIELD(value, regname, fldname) ((u64) \ + (((value) >> SYM_LSB(regname, fldname)) & \ + SYM_RMASK(regname, fldname))) + +/* useful for things like LaFifoEmpty_0...7, TxCreditOK_0...7, etc. */ +#define SYM_FIELD_ACROSS(value, regname, fldname, nbits) \ + (((value) >> SYM_LSB(regname, fldname)) & MASK_ACROSS(0, nbits)) + +#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask) +#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask) +#define ERR_MASK_N(fldname) SYM_MASK(ErrMask_0, fldname##Mask) +#define INT_MASK(fldname) SYM_MASK(IntMask, fldname##IntMask) +#define INT_MASK_P(fldname, port) SYM_MASK(IntMask, fldname##IntMask##_##port) +/* Below because most, but not all, fields of IntMask have that full suffix */ +#define INT_MASK_PM(fldname, port) SYM_MASK(IntMask, fldname##Mask##_##port) + + +#define SYM_LSB(regname, fldname) (QIB_7322_##regname##_##fldname##_LSB) + +/* + * the size bits give us 2^N, in KB units. 0 marks as invalid, + * and 7 is reserved. We currently use only 2KB and 4KB + */ +#define IBA7322_TID_SZ_SHIFT QIB_7322_RcvTIDArray0_RT_BufSize_LSB +#define IBA7322_TID_SZ_2K (1UL<kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(regno + (u64 __iomem *)( + (dd->ureg_align * ctxt) + (dd->userbase ? + (char __iomem *)dd->userbase : + (char __iomem *)dd->kregbase + dd->uregbase))); +} + +/** + * qib_read_ureg - read virtualized per-context register + * @dd: device + * @regno: register number + * @ctxt: context number + * + * Return the contents of a register that is virtualized to be per context. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u64 qib_read_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, int ctxt) +{ + + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(regno + (u64 __iomem *)( + (dd->ureg_align * ctxt) + (dd->userbase ? + (char __iomem *)dd->userbase : + (char __iomem *)dd->kregbase + dd->uregbase))); +} + +/** + * qib_write_ureg - write virtualized per-context register + * @dd: device + * @regno: register number + * @value: value + * @ctxt: context + * + * Write the contents of a register that is virtualized to be per context. + */ +static inline void qib_write_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, u64 value, int ctxt) +{ + u64 __iomem *ubase; + if (dd->userbase) + ubase = (u64 __iomem *) + ((char __iomem *) dd->userbase + + dd->ureg_align * ctxt); + else + ubase = (u64 __iomem *) + (dd->uregbase + + (char __iomem *) dd->kregbase + + dd->ureg_align * ctxt); + + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &ubase[regno]); +} + +static inline u32 qib_read_kreg32(const struct qib_devdata *dd, + const u32 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readl((u32 __iomem *) &dd->kregbase[regno]); +} + +static inline u64 qib_read_kreg64(const struct qib_devdata *dd, + const u32 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readq(&dd->kregbase[regno]); +} + +static inline void qib_write_kreg(const struct qib_devdata *dd, + const u32 regno, u64 value) +{ + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->kregbase[regno]); +} + +/* + * not many sanity checks for the port-specific kernel register routines, + * since they are only used when it's known to be safe. +*/ +static inline u64 qib_read_kreg_port(const struct qib_pportdata *ppd, + const u16 regno) +{ + if (!ppd->cpspec->kpregbase || !(ppd->dd->flags & QIB_PRESENT)) + return 0ULL; + return readq(&ppd->cpspec->kpregbase[regno]); +} + +static inline void qib_write_kreg_port(const struct qib_pportdata *ppd, + const u16 regno, u64 value) +{ + if (ppd->cpspec && ppd->dd && ppd->cpspec->kpregbase && + (ppd->dd->flags & QIB_PRESENT)) + writeq(value, &ppd->cpspec->kpregbase[regno]); +} + +/** + * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register + * @dd: the qlogic_ib device + * @regno: the register number to write + * @ctxt: the context containing the register + * @value: the value to write + */ +static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd, + const u16 regno, unsigned ctxt, + u64 value) +{ + qib_write_kreg(dd, regno + ctxt, value); +} + +static inline u64 read_7322_creg(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(&dd->cspec->cregbase[regno]); + + +} + +static inline u32 read_7322_creg32(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(&dd->cspec->cregbase[regno]); + + +} + +static inline void write_7322_creg_port(const struct qib_pportdata *ppd, + u16 regno, u64 value) +{ + if (ppd->cpspec && ppd->cpspec->cpregbase && + (ppd->dd->flags & QIB_PRESENT)) + writeq(value, &ppd->cpspec->cpregbase[regno]); +} + +static inline u64 read_7322_creg_port(const struct qib_pportdata *ppd, + u16 regno) +{ + if (!ppd->cpspec || !ppd->cpspec->cpregbase || + !(ppd->dd->flags & QIB_PRESENT)) + return 0; + return readq(&ppd->cpspec->cpregbase[regno]); +} + +static inline u32 read_7322_creg32_port(const struct qib_pportdata *ppd, + u16 regno) +{ + if (!ppd->cpspec || !ppd->cpspec->cpregbase || + !(ppd->dd->flags & QIB_PRESENT)) + return 0; + return readl(&ppd->cpspec->cpregbase[regno]); +} + +/* bits in Control register */ +#define QLOGIC_IB_C_RESET SYM_MASK(Control, SyncReset) +#define QLOGIC_IB_C_SDMAFETCHPRIOEN SYM_MASK(Control, SDmaDescFetchPriorityEn) + +/* bits in general interrupt regs */ +#define QIB_I_RCVURG_LSB SYM_LSB(IntMask, RcvUrg0IntMask) +#define QIB_I_RCVURG_RMASK MASK_ACROSS(0, 17) +#define QIB_I_RCVURG_MASK (QIB_I_RCVURG_RMASK << QIB_I_RCVURG_LSB) +#define QIB_I_RCVAVAIL_LSB SYM_LSB(IntMask, RcvAvail0IntMask) +#define QIB_I_RCVAVAIL_RMASK MASK_ACROSS(0, 17) +#define QIB_I_RCVAVAIL_MASK (QIB_I_RCVAVAIL_RMASK << QIB_I_RCVAVAIL_LSB) +#define QIB_I_C_ERROR INT_MASK(Err) + +#define QIB_I_SPIOSENT (INT_MASK_P(SendDone, 0) | INT_MASK_P(SendDone, 1)) +#define QIB_I_SPIOBUFAVAIL INT_MASK(SendBufAvail) +#define QIB_I_GPIO INT_MASK(AssertGPIO) +#define QIB_I_P_SDMAINT(pidx) \ + (INT_MASK_P(SDma, pidx) | INT_MASK_P(SDmaIdle, pidx) | \ + INT_MASK_P(SDmaProgress, pidx) | \ + INT_MASK_PM(SDmaCleanupDone, pidx)) + +/* Interrupt bits that are "per port" */ +#define QIB_I_P_BITSEXTANT(pidx) \ + (INT_MASK_P(Err, pidx) | INT_MASK_P(SendDone, pidx) | \ + INT_MASK_P(SDma, pidx) | INT_MASK_P(SDmaIdle, pidx) | \ + INT_MASK_P(SDmaProgress, pidx) | \ + INT_MASK_PM(SDmaCleanupDone, pidx)) + +/* Interrupt bits that are common to a device */ +/* currently unused: QIB_I_SPIOSENT */ +#define QIB_I_C_BITSEXTANT \ + (QIB_I_RCVURG_MASK | QIB_I_RCVAVAIL_MASK | \ + QIB_I_SPIOSENT | \ + QIB_I_C_ERROR | QIB_I_SPIOBUFAVAIL | QIB_I_GPIO) + +#define QIB_I_BITSEXTANT (QIB_I_C_BITSEXTANT | \ + QIB_I_P_BITSEXTANT(0) | QIB_I_P_BITSEXTANT(1)) + +/* + * Error bits that are "per port". + */ +#define QIB_E_P_IBSTATUSCHANGED ERR_MASK_N(IBStatusChanged) +#define QIB_E_P_SHDR ERR_MASK_N(SHeadersErr) +#define QIB_E_P_VL15_BUF_MISUSE ERR_MASK_N(VL15BufMisuseErr) +#define QIB_E_P_SND_BUF_MISUSE ERR_MASK_N(SendBufMisuseErr) +#define QIB_E_P_SUNSUPVL ERR_MASK_N(SendUnsupportedVLErr) +#define QIB_E_P_SUNEXP_PKTNUM ERR_MASK_N(SendUnexpectedPktNumErr) +#define QIB_E_P_SDROP_DATA ERR_MASK_N(SendDroppedDataPktErr) +#define QIB_E_P_SDROP_SMP ERR_MASK_N(SendDroppedSmpPktErr) +#define QIB_E_P_SPKTLEN ERR_MASK_N(SendPktLenErr) +#define QIB_E_P_SUNDERRUN ERR_MASK_N(SendUnderRunErr) +#define QIB_E_P_SMAXPKTLEN ERR_MASK_N(SendMaxPktLenErr) +#define QIB_E_P_SMINPKTLEN ERR_MASK_N(SendMinPktLenErr) +#define QIB_E_P_RIBLOSTLINK ERR_MASK_N(RcvIBLostLinkErr) +#define QIB_E_P_RHDR ERR_MASK_N(RcvHdrErr) +#define QIB_E_P_RHDRLEN ERR_MASK_N(RcvHdrLenErr) +#define QIB_E_P_RBADTID ERR_MASK_N(RcvBadTidErr) +#define QIB_E_P_RBADVERSION ERR_MASK_N(RcvBadVersionErr) +#define QIB_E_P_RIBFLOW ERR_MASK_N(RcvIBFlowErr) +#define QIB_E_P_REBP ERR_MASK_N(RcvEBPErr) +#define QIB_E_P_RUNSUPVL ERR_MASK_N(RcvUnsupportedVLErr) +#define QIB_E_P_RUNEXPCHAR ERR_MASK_N(RcvUnexpectedCharErr) +#define QIB_E_P_RSHORTPKTLEN ERR_MASK_N(RcvShortPktLenErr) +#define QIB_E_P_RLONGPKTLEN ERR_MASK_N(RcvLongPktLenErr) +#define QIB_E_P_RMAXPKTLEN ERR_MASK_N(RcvMaxPktLenErr) +#define QIB_E_P_RMINPKTLEN ERR_MASK_N(RcvMinPktLenErr) +#define QIB_E_P_RICRC ERR_MASK_N(RcvICRCErr) +#define QIB_E_P_RVCRC ERR_MASK_N(RcvVCRCErr) +#define QIB_E_P_RFORMATERR ERR_MASK_N(RcvFormatErr) + +#define QIB_E_P_SDMA1STDESC ERR_MASK_N(SDma1stDescErr) +#define QIB_E_P_SDMABASE ERR_MASK_N(SDmaBaseErr) +#define QIB_E_P_SDMADESCADDRMISALIGN ERR_MASK_N(SDmaDescAddrMisalignErr) +#define QIB_E_P_SDMADWEN ERR_MASK_N(SDmaDwEnErr) +#define QIB_E_P_SDMAGENMISMATCH ERR_MASK_N(SDmaGenMismatchErr) +#define QIB_E_P_SDMAHALT ERR_MASK_N(SDmaHaltErr) +#define QIB_E_P_SDMAMISSINGDW ERR_MASK_N(SDmaMissingDwErr) +#define QIB_E_P_SDMAOUTOFBOUND ERR_MASK_N(SDmaOutOfBoundErr) +#define QIB_E_P_SDMARPYTAG ERR_MASK_N(SDmaRpyTagErr) +#define QIB_E_P_SDMATAILOUTOFBOUND ERR_MASK_N(SDmaTailOutOfBoundErr) +#define QIB_E_P_SDMAUNEXPDATA ERR_MASK_N(SDmaUnexpDataErr) + +/* Error bits that are common to a device */ +#define QIB_E_RESET ERR_MASK(ResetNegated) +#define QIB_E_HARDWARE ERR_MASK(HardwareErr) +#define QIB_E_INVALIDADDR ERR_MASK(InvalidAddrErr) + + +/* + * Per chip (rather than per-port) errors. Most either do + * nothing but trigger a print (because they self-recover, or + * always occur in tandem with other errors that handle the + * issue), or because they indicate errors with no recovery, + * but we want to know that they happened. + */ +#define QIB_E_SBUF_VL15_MISUSE ERR_MASK(SBufVL15MisUseErr) +#define QIB_E_BADEEP ERR_MASK(InvalidEEPCmd) +#define QIB_E_VLMISMATCH ERR_MASK(SendVLMismatchErr) +#define QIB_E_ARMLAUNCH ERR_MASK(SendArmLaunchErr) +#define QIB_E_SPCLTRIG ERR_MASK(SendSpecialTriggerErr) +#define QIB_E_RRCVHDRFULL ERR_MASK(RcvHdrFullErr) +#define QIB_E_RRCVEGRFULL ERR_MASK(RcvEgrFullErr) +#define QIB_E_RCVCTXTSHARE ERR_MASK(RcvContextShareErr) + +/* SDMA chip errors (not per port) + * QIB_E_SDMA_BUF_DUP needs no special handling, because we will also get + * the SDMAHALT error immediately, so we just print the dup error via the + * E_AUTO mechanism. This is true of most of the per-port fatal errors + * as well, but since this is port-independent, by definition, it's + * handled a bit differently. SDMA_VL15 and SDMA_WRONG_PORT are per + * packet send errors, and so are handled in the same manner as other + * per-packet errors. + */ +#define QIB_E_SDMA_VL15 ERR_MASK(SDmaVL15Err) +#define QIB_E_SDMA_WRONG_PORT ERR_MASK(SDmaWrongPortErr) +#define QIB_E_SDMA_BUF_DUP ERR_MASK(SDmaBufMaskDuplicateErr) + +/* + * Below functionally equivalent to legacy QLOGIC_IB_E_PKTERRS + * it is used to print "common" packet errors. + */ +#define QIB_E_P_PKTERRS (QIB_E_P_SPKTLEN |\ + QIB_E_P_SDROP_DATA | QIB_E_P_RVCRC |\ + QIB_E_P_RICRC | QIB_E_P_RSHORTPKTLEN |\ + QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SHDR | \ + QIB_E_P_REBP) + +/* Error Bits that Packet-related (Receive, per-port) */ +#define QIB_E_P_RPKTERRS (\ + QIB_E_P_RHDRLEN | QIB_E_P_RBADTID | \ + QIB_E_P_RBADVERSION | QIB_E_P_RHDR | \ + QIB_E_P_RLONGPKTLEN | QIB_E_P_RSHORTPKTLEN |\ + QIB_E_P_RMAXPKTLEN | QIB_E_P_RMINPKTLEN | \ + QIB_E_P_RFORMATERR | QIB_E_P_RUNSUPVL | \ + QIB_E_P_RUNEXPCHAR | QIB_E_P_RIBFLOW | QIB_E_P_REBP) + +/* + * Error bits that are Send-related (per port) + * (ARMLAUNCH excluded from E_SPKTERRS because it gets special handling). + * All of these potentially need to have a buffer disarmed + */ +#define QIB_E_P_SPKTERRS (\ + QIB_E_P_SUNEXP_PKTNUM |\ + QIB_E_P_SDROP_DATA | QIB_E_P_SDROP_SMP |\ + QIB_E_P_SMAXPKTLEN |\ + QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SHDR | \ + QIB_E_P_SMINPKTLEN | QIB_E_P_SPKTLEN | \ + QIB_E_P_SND_BUF_MISUSE | QIB_E_P_SUNSUPVL) + +#define QIB_E_SPKTERRS ( \ + QIB_E_SBUF_VL15_MISUSE | QIB_E_VLMISMATCH | \ + ERR_MASK_N(SendUnsupportedVLErr) | \ + QIB_E_SPCLTRIG | QIB_E_SDMA_VL15 | QIB_E_SDMA_WRONG_PORT) + +#define QIB_E_P_SDMAERRS ( \ + QIB_E_P_SDMAHALT | \ + QIB_E_P_SDMADESCADDRMISALIGN | \ + QIB_E_P_SDMAUNEXPDATA | \ + QIB_E_P_SDMAMISSINGDW | \ + QIB_E_P_SDMADWEN | \ + QIB_E_P_SDMARPYTAG | \ + QIB_E_P_SDMA1STDESC | \ + QIB_E_P_SDMABASE | \ + QIB_E_P_SDMATAILOUTOFBOUND | \ + QIB_E_P_SDMAOUTOFBOUND | \ + QIB_E_P_SDMAGENMISMATCH) + +/* + * This sets some bits more than once, but makes it more obvious which + * bits are not handled under other categories, and the repeat definition + * is not a problem. + */ +#define QIB_E_P_BITSEXTANT ( \ + QIB_E_P_SPKTERRS | QIB_E_P_PKTERRS | QIB_E_P_RPKTERRS | \ + QIB_E_P_RIBLOSTLINK | QIB_E_P_IBSTATUSCHANGED | \ + QIB_E_P_SND_BUF_MISUSE | QIB_E_P_SUNDERRUN | \ + QIB_E_P_SHDR | QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SDMAERRS \ + ) + +/* + * These are errors that can occur when the link + * changes state while a packet is being sent or received. This doesn't + * cover things like EBP or VCRC that can be the result of a sending + * having the link change state, so we receive a "known bad" packet. + * All of these are "per port", so renamed: + */ +#define QIB_E_P_LINK_PKTERRS (\ + QIB_E_P_SDROP_DATA | QIB_E_P_SDROP_SMP |\ + QIB_E_P_SMINPKTLEN | QIB_E_P_SPKTLEN |\ + QIB_E_P_RSHORTPKTLEN | QIB_E_P_RMINPKTLEN |\ + QIB_E_P_RUNEXPCHAR) + +/* + * This sets some bits more than once, but makes it more obvious which + * bits are not handled under other categories (such as QIB_E_SPKTERRS), + * and the repeat definition is not a problem. + */ +#define QIB_E_C_BITSEXTANT (\ + QIB_E_HARDWARE | QIB_E_INVALIDADDR | QIB_E_BADEEP |\ + QIB_E_ARMLAUNCH | QIB_E_VLMISMATCH | QIB_E_RRCVHDRFULL |\ + QIB_E_RRCVEGRFULL | QIB_E_RESET | QIB_E_SBUF_VL15_MISUSE) + +/* Likewise Neuter E_SPKT_ERRS_IGNORE */ +#define E_SPKT_ERRS_IGNORE 0 + +#define QIB_EXTS_MEMBIST_DISABLED \ + SYM_MASK(EXTStatus, MemBISTDisabled) +#define QIB_EXTS_MEMBIST_ENDTEST \ + SYM_MASK(EXTStatus, MemBISTEndTest) + +#define QIB_E_SPIOARMLAUNCH \ + ERR_MASK(SendArmLaunchErr) + +#define IBA7322_IBCC_LINKINITCMD_MASK SYM_RMASK(IBCCtrlA_0, LinkInitCmd) +#define IBA7322_IBCC_LINKCMD_SHIFT SYM_LSB(IBCCtrlA_0, LinkCmd) + +/* + * IBTA_1_2 is set when multiple speeds are enabled (normal), + * and also if forced QDR (only QDR enabled). It's enabled for the + * forced QDR case so that scrambling will be enabled by the TS3 + * exchange, when supported by both sides of the link. + */ +#define IBA7322_IBC_IBTA_1_2_MASK SYM_MASK(IBCCtrlB_0, IB_ENHANCED_MODE) +#define IBA7322_IBC_MAX_SPEED_MASK SYM_MASK(IBCCtrlB_0, SD_SPEED) +#define IBA7322_IBC_SPEED_QDR SYM_MASK(IBCCtrlB_0, SD_SPEED_QDR) +#define IBA7322_IBC_SPEED_DDR SYM_MASK(IBCCtrlB_0, SD_SPEED_DDR) +#define IBA7322_IBC_SPEED_SDR SYM_MASK(IBCCtrlB_0, SD_SPEED_SDR) +#define IBA7322_IBC_SPEED_MASK (SYM_MASK(IBCCtrlB_0, SD_SPEED_SDR) | \ + SYM_MASK(IBCCtrlB_0, SD_SPEED_DDR) | SYM_MASK(IBCCtrlB_0, SD_SPEED_QDR)) +#define IBA7322_IBC_SPEED_LSB SYM_LSB(IBCCtrlB_0, SD_SPEED_SDR) + +#define IBA7322_LEDBLINK_OFF_SHIFT SYM_LSB(RcvPktLEDCnt_0, OFFperiod) +#define IBA7322_LEDBLINK_ON_SHIFT SYM_LSB(RcvPktLEDCnt_0, ONperiod) + +#define IBA7322_IBC_WIDTH_AUTONEG SYM_MASK(IBCCtrlB_0, IB_NUM_CHANNELS) +#define IBA7322_IBC_WIDTH_4X_ONLY (1<> \ + SYM_LSB(IBCCtrlB_0, HRTBT_ENB)) +#define IBA7322_IBC_HRTBT_LSB SYM_LSB(IBCCtrlB_0, HRTBT_ENB) + +#define IBA7322_REDIRECT_VEC_PER_REG 12 + +#define IBA7322_SENDCHK_PKEY SYM_MASK(SendCheckControl_0, PKey_En) +#define IBA7322_SENDCHK_BTHQP SYM_MASK(SendCheckControl_0, BTHQP_En) +#define IBA7322_SENDCHK_SLID SYM_MASK(SendCheckControl_0, SLID_En) +#define IBA7322_SENDCHK_RAW_IPV6 SYM_MASK(SendCheckControl_0, RawIPV6_En) +#define IBA7322_SENDCHK_MINSZ SYM_MASK(SendCheckControl_0, PacketTooSmall_En) + +#define AUTONEG_TRIES 3 /* sequential retries to negotiate DDR */ + +#define HWE_AUTO(fldname) { .mask = SYM_MASK(HwErrMask, fldname##Mask), \ + .msg = #fldname , .sz = sizeof(#fldname) } +#define HWE_AUTO_P(fldname, port) { .mask = SYM_MASK(HwErrMask, \ + fldname##Mask##_##port), .msg = #fldname , .sz = sizeof(#fldname) } +static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = { + HWE_AUTO_P(IBSerdesPClkNotDetect, 1), + HWE_AUTO_P(IBSerdesPClkNotDetect, 0), + HWE_AUTO(PCIESerdesPClkNotDetect), + HWE_AUTO(PowerOnBISTFailed), + HWE_AUTO(TempsenseTholdReached), + HWE_AUTO(MemoryErr), + HWE_AUTO(PCIeBusParityErr), + HWE_AUTO(PcieCplTimeout), + HWE_AUTO(PciePoisonedTLP), + HWE_AUTO_P(SDmaMemReadErr, 1), + HWE_AUTO_P(SDmaMemReadErr, 0), + HWE_AUTO_P(IBCBusFromSPCParityErr, 1), + HWE_AUTO_P(IBCBusToSPCParityErr, 1), + HWE_AUTO_P(IBCBusFromSPCParityErr, 0), + HWE_AUTO(statusValidNoEop), + HWE_AUTO(LATriggered), + { .mask = 0, .sz = 0 } +}; + +#define E_AUTO(fldname) { .mask = SYM_MASK(ErrMask, fldname##Mask), \ + .msg = #fldname, .sz = sizeof(#fldname) } +#define E_P_AUTO(fldname) { .mask = SYM_MASK(ErrMask_0, fldname##Mask), \ + .msg = #fldname, .sz = sizeof(#fldname) } +static const struct qib_hwerror_msgs qib_7322error_msgs[] = { + E_AUTO(RcvEgrFullErr), + E_AUTO(RcvHdrFullErr), + E_AUTO(ResetNegated), + E_AUTO(HardwareErr), + E_AUTO(InvalidAddrErr), + E_AUTO(SDmaVL15Err), + E_AUTO(SBufVL15MisUseErr), + E_AUTO(InvalidEEPCmd), + E_AUTO(RcvContextShareErr), + E_AUTO(SendVLMismatchErr), + E_AUTO(SendArmLaunchErr), + E_AUTO(SendSpecialTriggerErr), + E_AUTO(SDmaWrongPortErr), + E_AUTO(SDmaBufMaskDuplicateErr), + { .mask = 0, .sz = 0 } +}; + +static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { + E_P_AUTO(IBStatusChanged), + E_P_AUTO(SHeadersErr), + E_P_AUTO(VL15BufMisuseErr), + /* + * SDmaHaltErr is not really an error, make it clearer; + */ + {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted", + .sz = 11}, + E_P_AUTO(SDmaDescAddrMisalignErr), + E_P_AUTO(SDmaUnexpDataErr), + E_P_AUTO(SDmaMissingDwErr), + E_P_AUTO(SDmaDwEnErr), + E_P_AUTO(SDmaRpyTagErr), + E_P_AUTO(SDma1stDescErr), + E_P_AUTO(SDmaBaseErr), + E_P_AUTO(SDmaTailOutOfBoundErr), + E_P_AUTO(SDmaOutOfBoundErr), + E_P_AUTO(SDmaGenMismatchErr), + E_P_AUTO(SendBufMisuseErr), + E_P_AUTO(SendUnsupportedVLErr), + E_P_AUTO(SendUnexpectedPktNumErr), + E_P_AUTO(SendDroppedDataPktErr), + E_P_AUTO(SendDroppedSmpPktErr), + E_P_AUTO(SendPktLenErr), + E_P_AUTO(SendUnderRunErr), + E_P_AUTO(SendMaxPktLenErr), + E_P_AUTO(SendMinPktLenErr), + E_P_AUTO(RcvIBLostLinkErr), + E_P_AUTO(RcvHdrErr), + E_P_AUTO(RcvHdrLenErr), + E_P_AUTO(RcvBadTidErr), + E_P_AUTO(RcvBadVersionErr), + E_P_AUTO(RcvIBFlowErr), + E_P_AUTO(RcvEBPErr), + E_P_AUTO(RcvUnsupportedVLErr), + E_P_AUTO(RcvUnexpectedCharErr), + E_P_AUTO(RcvShortPktLenErr), + E_P_AUTO(RcvLongPktLenErr), + E_P_AUTO(RcvMaxPktLenErr), + E_P_AUTO(RcvMinPktLenErr), + E_P_AUTO(RcvICRCErr), + E_P_AUTO(RcvVCRCErr), + E_P_AUTO(RcvFormatErr), + { .mask = 0, .sz = 0 } +}; + +/* + * Below generates "auto-message" for interrupts not specific to any port or + * context + */ +#define INTR_AUTO(fldname) { .mask = SYM_MASK(IntMask, fldname##Mask), \ + .msg = #fldname, .sz = sizeof(#fldname) } +/* Below generates "auto-message" for interrupts specific to a port */ +#define INTR_AUTO_P(fldname) { .mask = MASK_ACROSS(\ + SYM_LSB(IntMask, fldname##Mask##_0), \ + SYM_LSB(IntMask, fldname##Mask##_1)), \ + .msg = #fldname "_P", .sz = sizeof(#fldname "_P") } +/* For some reason, the SerDesTrimDone bits are reversed */ +#define INTR_AUTO_PI(fldname) { .mask = MASK_ACROSS(\ + SYM_LSB(IntMask, fldname##Mask##_1), \ + SYM_LSB(IntMask, fldname##Mask##_0)), \ + .msg = #fldname "_P", .sz = sizeof(#fldname "_P") } +/* + * Below generates "auto-message" for interrupts specific to a context, + * with ctxt-number appended + */ +#define INTR_AUTO_C(fldname) { .mask = MASK_ACROSS(\ + SYM_LSB(IntMask, fldname##0IntMask), \ + SYM_LSB(IntMask, fldname##17IntMask)), \ + .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } + +static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { + INTR_AUTO_P(SDmaInt), + INTR_AUTO_P(SDmaProgressInt), + INTR_AUTO_P(SDmaIdleInt), + INTR_AUTO_P(SDmaCleanupDone), + INTR_AUTO_C(RcvUrg), + INTR_AUTO_P(ErrInt), + INTR_AUTO(ErrInt), /* non-port-specific errs */ + INTR_AUTO(AssertGPIOInt), + INTR_AUTO_P(SendDoneInt), + INTR_AUTO(SendBufAvailInt), + INTR_AUTO_C(RcvAvail), + { .mask = 0, .sz = 0 } +}; + +#define TXSYMPTOM_AUTO_P(fldname) \ + { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ + .msg = #fldname, .sz = sizeof(#fldname) } +static const struct qib_hwerror_msgs hdrchk_msgs[] = { + TXSYMPTOM_AUTO_P(NonKeyPacket), + TXSYMPTOM_AUTO_P(GRHFail), + TXSYMPTOM_AUTO_P(PkeyFail), + TXSYMPTOM_AUTO_P(QPFail), + TXSYMPTOM_AUTO_P(SLIDFail), + TXSYMPTOM_AUTO_P(RawIPV6), + TXSYMPTOM_AUTO_P(PacketTooSmall), + { .mask = 0, .sz = 0 } +}; + +#define IBA7322_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */ + +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used, + * because we don't need to force the update of pioavail + */ +static void qib_disarm_7322_senderrbufs(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u32 i; + int any; + u32 piobcnt = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS; + u32 regcnt = (piobcnt + BITS_PER_LONG - 1) / BITS_PER_LONG; + unsigned long sbuf[4]; + + /* + * It's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling. + */ + any = 0; + for (i = 0; i < regcnt; ++i) { + sbuf[i] = qib_read_kreg64(dd, kr_sendbuffererror + i); + if (sbuf[i]) { + any = 1; + qib_write_kreg(dd, kr_sendbuffererror + i, sbuf[i]); + } + } + + if (any) + qib_disarm_piobufs_set(dd, sbuf, piobcnt); +} + +/* No txe_recover yet, if ever */ + +/* No decode__errors yet */ +static void err_decode(char *msg, size_t len, u64 errs, + const struct qib_hwerror_msgs *msp) +{ + u64 these, lmask; + int took, multi, n = 0; + + while (errs && msp && msp->mask) { + multi = (msp->mask & (msp->mask - 1)); + while (errs & msp->mask) { + these = (errs & msp->mask); + lmask = (these & (these - 1)) ^ these; + if (len) { + if (n++) { + /* separate the strings */ + *msg++ = ','; + len--; + } + BUG_ON(!msp->sz); + /* msp->sz counts the nul */ + took = min_t(size_t, msp->sz - (size_t)1, len); + memcpy(msg, msp->msg, took); + len -= took; + msg += took; + if (len) + *msg = '\0'; + } + errs &= ~lmask; + if (len && multi) { + /* More than one bit this mask */ + int idx = -1; + + while (lmask & msp->mask) { + ++idx; + lmask >>= 1; + } + took = scnprintf(msg, len, "_%d", idx); + len -= took; + msg += took; + } + } + ++msp; + } + /* If some bits are left, show in hex. */ + if (len && errs) + snprintf(msg, len, "%sMORE:%llX", n ? "," : "", + (unsigned long long) errs); +} + +/* only called if r1 set */ +static void flush_fifo(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u32 __iomem *piobuf; + u32 bufn; + u32 *hdr; + u64 pbc; + const unsigned hdrwords = 7; + static struct qib_ib_header ibhdr = { + .lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH), + .lrh[1] = IB_LID_PERMISSIVE, + .lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC), + .lrh[3] = IB_LID_PERMISSIVE, + .u.oth.bth[0] = cpu_to_be32( + (IB_OPCODE_UD_SEND_ONLY << 24) | QIB_DEFAULT_P_KEY), + .u.oth.bth[1] = cpu_to_be32(0), + .u.oth.bth[2] = cpu_to_be32(0), + .u.oth.u.ud.deth[0] = cpu_to_be32(0), + .u.oth.u.ud.deth[1] = cpu_to_be32(0), + }; + + /* + * Send a dummy VL15 packet to flush the launch FIFO. + * This will not actually be sent since the TxeBypassIbc bit is set. + */ + pbc = PBC_7322_VL15_SEND | + (((u64)ppd->hw_pidx) << (PBC_PORT_SEL_LSB + 32)) | + (hdrwords + SIZE_OF_CRC); + piobuf = qib_7322_getsendbuf(ppd, pbc, &bufn); + if (!piobuf) + return; + writeq(pbc, piobuf); + hdr = (u32 *) &ibhdr; + if (dd->flags & QIB_PIO_FLUSH_WC) { + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdr, hdrwords - 1); + qib_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords + 1); + qib_flush_wc(); + } else + qib_pio_copy(piobuf + 2, hdr, hdrwords); + qib_sendbuf_done(dd, bufn); +} + +/* + * This is called with interrupts disabled and sdma_lock held. + */ +static void qib_7322_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op) +{ + struct qib_devdata *dd = ppd->dd; + u64 set_sendctrl = 0; + u64 clr_sendctrl = 0; + + if (op & QIB_SDMA_SENDCTRL_OP_ENABLE) + set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_INTENABLE) + set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaIntEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaIntEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_HALT) + set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaHalt); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaHalt); + + if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) + set_sendctrl |= SYM_MASK(SendCtrl_0, TxeBypassIbc) | + SYM_MASK(SendCtrl_0, TxeAbortIbc) | + SYM_MASK(SendCtrl_0, TxeDrainRmFifo); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, TxeBypassIbc) | + SYM_MASK(SendCtrl_0, TxeAbortIbc) | + SYM_MASK(SendCtrl_0, TxeDrainRmFifo); + + spin_lock(&dd->sendctrl_lock); + + /* If we are draining everything, block sends first */ + if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) { + ppd->p_sendctrl &= ~SYM_MASK(SendCtrl_0, SendEnable); + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + ppd->p_sendctrl |= set_sendctrl; + ppd->p_sendctrl &= ~clr_sendctrl; + + if (op & QIB_SDMA_SENDCTRL_OP_CLEANUP) + qib_write_kreg_port(ppd, krp_sendctrl, + ppd->p_sendctrl | + SYM_MASK(SendCtrl_0, SDmaCleanup)); + else + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) { + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, SendEnable); + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock(&dd->sendctrl_lock); + + if ((op & QIB_SDMA_SENDCTRL_OP_DRAIN) && ppd->dd->cspec->r1) + flush_fifo(ppd); +} + +static void qib_7322_sdma_hw_clean_up(struct qib_pportdata *ppd) +{ + __qib_sdma_process_event(ppd, qib_sdma_event_e50_hw_cleaned); +} + +static void qib_sdma_7322_setlengen(struct qib_pportdata *ppd) +{ + /* + * Set SendDmaLenGen and clear and set + * the MSB of the generation count to enable generation checking + * and load the internal generation counter. + */ + qib_write_kreg_port(ppd, krp_senddmalengen, ppd->sdma_descq_cnt); + qib_write_kreg_port(ppd, krp_senddmalengen, + ppd->sdma_descq_cnt | + (1ULL << QIB_7322_SendDmaLenGen_0_Generation_MSB)); +} + +/* + * Must be called with sdma_lock held, or before init finished. + */ +static void qib_sdma_update_7322_tail(struct qib_pportdata *ppd, u16 tail) +{ + /* Commit writes to memory and advance the tail on the chip */ + wmb(); + ppd->sdma_descq_tail = tail; + qib_write_kreg_port(ppd, krp_senddmatail, tail); +} + +/* + * This is called with interrupts disabled and sdma_lock held. + */ +static void qib_7322_sdma_hw_start_up(struct qib_pportdata *ppd) +{ + /* + * Drain all FIFOs. + * The hardware doesn't require this but we do it so that verbs + * and user applications don't wait for link active to send stale + * data. + */ + sendctrl_7322_mod(ppd, QIB_SENDCTRL_FLUSH); + + qib_sdma_7322_setlengen(ppd); + qib_sdma_update_7322_tail(ppd, 0); /* Set SendDmaTail */ + ppd->sdma_head_dma[0] = 0; + qib_7322_sdma_sendctrl(ppd, + ppd->sdma_state.current_op | QIB_SDMA_SENDCTRL_OP_CLEANUP); +} + +#define DISABLES_SDMA ( \ + QIB_E_P_SDMAHALT | \ + QIB_E_P_SDMADESCADDRMISALIGN | \ + QIB_E_P_SDMAMISSINGDW | \ + QIB_E_P_SDMADWEN | \ + QIB_E_P_SDMARPYTAG | \ + QIB_E_P_SDMA1STDESC | \ + QIB_E_P_SDMABASE | \ + QIB_E_P_SDMATAILOUTOFBOUND | \ + QIB_E_P_SDMAOUTOFBOUND | \ + QIB_E_P_SDMAGENMISMATCH) + +static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) +{ + unsigned long flags; + struct qib_devdata *dd = ppd->dd; + + errs &= QIB_E_P_SDMAERRS; + + if (errs & QIB_E_P_SDMAUNEXPDATA) + qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit, + ppd->port); + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + switch (ppd->sdma_state.current_state) { + case qib_sdma_state_s00_hw_down: + break; + + case qib_sdma_state_s10_hw_start_up_wait: + if (errs & QIB_E_P_SDMAHALT) + __qib_sdma_process_event(ppd, + qib_sdma_event_e20_hw_started); + break; + + case qib_sdma_state_s20_idle: + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + if (errs & QIB_E_P_SDMAHALT) + __qib_sdma_process_event(ppd, + qib_sdma_event_e50_hw_cleaned); + break; + + case qib_sdma_state_s50_hw_halt_wait: + if (errs & QIB_E_P_SDMAHALT) + __qib_sdma_process_event(ppd, + qib_sdma_event_e60_hw_halted); + break; + + case qib_sdma_state_s99_running: + __qib_sdma_process_event(ppd, qib_sdma_event_e7322_err_halted); + __qib_sdma_process_event(ppd, qib_sdma_event_e60_hw_halted); + break; + } + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +/* + * handle per-device errors (not per-port errors) + */ +static noinline void handle_7322_errors(struct qib_devdata *dd) +{ + char *msg; + u64 iserr = 0; + u64 errs; + u64 mask; + int log_idx; + + qib_stats.sps_errints++; + errs = qib_read_kreg64(dd, kr_errstatus); + if (!errs) { + qib_devinfo(dd->pcidev, "device error interrupt, " + "but no error bits set!\n"); + goto done; + } + + /* don't report errors that are masked */ + errs &= dd->cspec->errormask; + msg = dd->cspec->emsgbuf; + + /* do these first, they are most important */ + if (errs & QIB_E_HARDWARE) { + *msg = '\0'; + qib_7322_handle_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf); + } else + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (errs & dd->eep_st_masks[log_idx].errs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + if (errs & QIB_E_SPKTERRS) { + qib_disarm_7322_senderrbufs(dd->pport); + qib_stats.sps_txerrs++; + } else if (errs & QIB_E_INVALIDADDR) + qib_stats.sps_txerrs++; + else if (errs & QIB_E_ARMLAUNCH) { + qib_stats.sps_txerrs++; + qib_disarm_7322_senderrbufs(dd->pport); + } + qib_write_kreg(dd, kr_errclear, errs); + + /* + * The ones we mask off are handled specially below + * or above. Also mask SDMADISABLED by default as it + * is too chatty. + */ + mask = QIB_E_HARDWARE; + *msg = '\0'; + + err_decode(msg, sizeof dd->cspec->emsgbuf, errs & ~mask, + qib_7322error_msgs); + + /* + * Getting reset is a tragedy for all ports. Mark the device + * _and_ the ports as "offline" in way meaningful to each. + */ + if (errs & QIB_E_RESET) { + int pidx; + + qib_dev_err(dd, "Got reset, requires re-init " + "(unload and reload driver)\n"); + dd->flags &= ~QIB_INITTED; /* needs re-init */ + /* mark as having had error */ + *dd->devstatusp |= QIB_STATUS_HWERROR; + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + *dd->pport[pidx].statusp &= ~QIB_STATUS_IB_CONF; + } + + if (*msg && iserr) + qib_dev_err(dd, "%s error\n", msg); + + /* + * If there were hdrq or egrfull errors, wake up any processes + * waiting in poll. We used to try to check which contexts had + * the overflow, but given the cost of that and the chip reads + * to support it, it's better to just wake everybody up if we + * get an overflow; waiters can poll again if it's not them. + */ + if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) { + qib_handle_urcv(dd, ~0U); + if (errs & ERR_MASK(RcvEgrFullErr)) + qib_stats.sps_buffull++; + else + qib_stats.sps_hdrfull++; + } + +done: + return; +} + +static void qib_error_tasklet(unsigned long data) +{ + struct qib_devdata *dd = (struct qib_devdata *)data; + + handle_7322_errors(dd); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +static void reenable_chase(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + + ppd->cpspec->chase_timer.expires = 0; + qib_set_ib_7322_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_POLL); +} + +static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow, + u8 ibclt) +{ + ppd->cpspec->chase_end = 0; + + if (!qib_chase) + return; + + qib_set_ib_7322_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + ppd->cpspec->chase_timer.expires = jiffies + QIB_CHASE_DIS_TIME; + add_timer(&ppd->cpspec->chase_timer); +} + +static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) +{ + u8 ibclt; + unsigned long tnow; + + ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState); + + /* + * Detect and handle the state chase issue, where we can + * get stuck if we are unlucky on timing on both sides of + * the link. If we are, we disable, set a timer, and + * then re-enable. + */ + switch (ibclt) { + case IB_7322_LT_STATE_CFGRCVFCFG: + case IB_7322_LT_STATE_CFGWAITRMT: + case IB_7322_LT_STATE_TXREVLANES: + case IB_7322_LT_STATE_CFGENH: + tnow = jiffies; + if (ppd->cpspec->chase_end && + time_after(tnow, ppd->cpspec->chase_end)) + disable_chase(ppd, tnow, ibclt); + else if (!ppd->cpspec->chase_end) + ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME; + break; + default: + ppd->cpspec->chase_end = 0; + break; + } + + if (((ibclt >= IB_7322_LT_STATE_CFGTEST && + ibclt <= IB_7322_LT_STATE_CFGWAITENH) || + ibclt == IB_7322_LT_STATE_LINKUP) && + (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) { + force_h1(ppd); + ppd->cpspec->qdr_reforce = 1; + if (!ppd->dd->cspec->r1) + serdes_7322_los_enable(ppd, 0); + } else if (ppd->cpspec->qdr_reforce && + (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) && + (ibclt == IB_7322_LT_STATE_CFGENH || + ibclt == IB_7322_LT_STATE_CFGIDLE || + ibclt == IB_7322_LT_STATE_LINKUP)) + force_h1(ppd); + + if ((IS_QMH(ppd->dd) || IS_QME(ppd->dd)) && + ppd->link_speed_enabled == QIB_IB_QDR && + (ibclt == IB_7322_LT_STATE_CFGTEST || + ibclt == IB_7322_LT_STATE_CFGENH || + (ibclt >= IB_7322_LT_STATE_POLLACTIVE && + ibclt <= IB_7322_LT_STATE_SLEEPQUIET))) + adj_tx_serdes(ppd); + + if (ibclt != IB_7322_LT_STATE_LINKUP) { + u8 ltstate = qib_7322_phys_portstate(ibcst); + u8 pibclt = (u8)SYM_FIELD(ppd->lastibcstat, IBCStatusA_0, + LinkTrainingState); + if (!ppd->dd->cspec->r1 && + pibclt == IB_7322_LT_STATE_LINKUP && + ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER && + ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN && + ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT && + ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE) + /* If the link went down (but no into recovery, + * turn LOS back on */ + serdes_7322_los_enable(ppd, 1); + if (!ppd->cpspec->qdr_dfe_on && + ibclt <= IB_7322_LT_STATE_SLEEPQUIET) { + ppd->cpspec->qdr_dfe_on = 1; + ppd->cpspec->qdr_dfe_time = 0; + /* On link down, reenable QDR adaptation */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : + QDR_STATIC_ADAPT_DOWN); + printk(KERN_INFO QIB_DRV_NAME + " IB%u:%u re-enabled QDR adaptation " + "ibclt %x\n", ppd->dd->unit, ppd->port, ibclt); + } + } +} + +static int qib_7322_set_ib_cfg(struct qib_pportdata *, int, u32); + +/* + * This is per-pport error handling. + * will likely get it's own MSIx interrupt (one for each port, + * although just a single handler). + */ +static noinline void handle_7322_p_errors(struct qib_pportdata *ppd) +{ + char *msg; + u64 ignore_this_time = 0, iserr = 0, errs, fmask; + struct qib_devdata *dd = ppd->dd; + + /* do this as soon as possible */ + fmask = qib_read_kreg64(dd, kr_act_fmask); + if (!fmask) + check_7322_rxe_status(ppd); + + errs = qib_read_kreg_port(ppd, krp_errstatus); + if (!errs) + qib_devinfo(dd->pcidev, + "Port%d error interrupt, but no error bits set!\n", + ppd->port); + if (!fmask) + errs &= ~QIB_E_P_IBSTATUSCHANGED; + if (!errs) + goto done; + + msg = ppd->cpspec->epmsgbuf; + *msg = '\0'; + + if (errs & ~QIB_E_P_BITSEXTANT) { + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, + errs & ~QIB_E_P_BITSEXTANT, qib_7322p_error_msgs); + if (!*msg) + snprintf(msg, sizeof ppd->cpspec->epmsgbuf, + "no others"); + qib_dev_porterr(dd, ppd->port, "error interrupt with unknown" + " errors 0x%016Lx set (and %s)\n", + (errs & ~QIB_E_P_BITSEXTANT), msg); + *msg = '\0'; + } + + if (errs & QIB_E_P_SHDR) { + u64 symptom; + + /* determine cause, then write to clear */ + symptom = qib_read_kreg_port(ppd, krp_sendhdrsymptom); + qib_write_kreg_port(ppd, krp_sendhdrsymptom, 0); + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, symptom, + hdrchk_msgs); + *msg = '\0'; + /* senderrbuf cleared in SPKTERRS below */ + } + + if (errs & QIB_E_P_SPKTERRS) { + if ((errs & QIB_E_P_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when trying to bring the link + * up, but the IB link changes state at the "wrong" + * time. The IB logic then complains that the packet + * isn't valid. We don't want to confuse people, so + * we just don't print them, except at debug + */ + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, + (errs & QIB_E_P_LINK_PKTERRS), + qib_7322p_error_msgs); + *msg = '\0'; + ignore_this_time = errs & QIB_E_P_LINK_PKTERRS; + } + qib_disarm_7322_senderrbufs(ppd); + } else if ((errs & QIB_E_P_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, errs, + qib_7322p_error_msgs); + ignore_this_time = errs & QIB_E_P_LINK_PKTERRS; + *msg = '\0'; + } + + qib_write_kreg_port(ppd, krp_errclear, errs); + + errs &= ~ignore_this_time; + if (!errs) + goto done; + + if (errs & QIB_E_P_RPKTERRS) + qib_stats.sps_rcverrs++; + if (errs & QIB_E_P_SPKTERRS) + qib_stats.sps_txerrs++; + + iserr = errs & ~(QIB_E_P_RPKTERRS | QIB_E_P_PKTERRS); + + if (errs & QIB_E_P_SDMAERRS) + sdma_7322_p_errors(ppd, errs); + + if (errs & QIB_E_P_IBSTATUSCHANGED) { + u64 ibcs; + u8 ltstate; + + ibcs = qib_read_kreg_port(ppd, krp_ibcstatus_a); + ltstate = qib_7322_phys_portstate(ibcs); + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + handle_serdes_issues(ppd, ibcs); + if (!(ppd->cpspec->ibcctrl_a & + SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn))) { + /* + * We got our interrupt, so init code should be + * happy and not try alternatives. Now squelch + * other "chatter" from link-negotiation (pre Init) + */ + ppd->cpspec->ibcctrl_a |= + SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + } + + /* Update our picture of width and speed from chip */ + ppd->link_width_active = + (ibcs & SYM_MASK(IBCStatusA_0, LinkWidthActive)) ? + IB_WIDTH_4X : IB_WIDTH_1X; + ppd->link_speed_active = (ibcs & SYM_MASK(IBCStatusA_0, + LinkSpeedQDR)) ? QIB_IB_QDR : (ibcs & + SYM_MASK(IBCStatusA_0, LinkSpeedActive)) ? + QIB_IB_DDR : QIB_IB_SDR; + + if ((ppd->lflags & QIBL_IB_LINK_DISABLED) && ltstate != + IB_PHYSPORTSTATE_DISABLED) + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + else + /* + * Since going into a recovery state causes the link + * state to go down and since recovery is transitory, + * it is better if we "miss" ever seeing the link + * training state go into recovery (i.e., ignore this + * transition for link state special handling purposes) + * without updating lastibcstat. + */ + if (ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER && + ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN && + ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT && + ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE) + qib_handle_e_ibstatuschanged(ppd, ibcs); + } + if (*msg && iserr) + qib_dev_porterr(dd, ppd->port, "%s error\n", msg); + + if (ppd->state_wanted & ppd->lflags) + wake_up_interruptible(&ppd->state_wait); +done: + return; +} + +/* enable/disable chip from delivering interrupts */ +static void qib_7322_set_intr_state(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + if (dd->flags & QIB_BADINTR) + return; + qib_write_kreg(dd, kr_intmask, dd->cspec->int_enable_mask); + /* cause any pending enabled interrupts to be re-delivered */ + qib_write_kreg(dd, kr_intclear, 0ULL); + if (dd->cspec->num_msix_entries) { + /* and same for MSIx */ + u64 val = qib_read_kreg64(dd, kr_intgranted); + if (val) + qib_write_kreg(dd, kr_intgranted, val); + } + } else + qib_write_kreg(dd, kr_intmask, 0ULL); +} + +/* + * Try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + * This is in chip-specific code because of all of the register accesses, + * even though the details are similar on most chips. + */ +static void qib_7322_clear_freeze(struct qib_devdata *dd) +{ + int pidx; + + /* disable error interrupts, to avoid confusion */ + qib_write_kreg(dd, kr_errmask, 0ULL); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + qib_write_kreg_port(dd->pport + pidx, krp_errmask, + 0ULL); + + /* also disable interrupts; errormask is sometimes overwriten */ + qib_7322_set_intr_state(dd, 0); + + /* clear the freeze, and be sure chip saw it */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + + /* + * Force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); + /* We need to purge per-port errs and reset mask, too */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + if (!dd->pport[pidx].link_speed_supported) + continue; + qib_write_kreg_port(dd->pport + pidx, krp_errclear, ~0Ull); + qib_write_kreg_port(dd->pport + pidx, krp_errmask, ~0Ull); + } + qib_7322_set_intr_state(dd, 1); +} + +/* no error handling to speak of */ +/** + * qib_7322_handle_hwerrors - display hardware errors. + * @dd: the qlogic_ib device + * @msg: the output buffer + * @msgl: the size of the output buffer + * + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * qib_handle_errors() to avoid excessive stack usage. + */ +static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, + size_t msgl) +{ + u64 hwerrs; + u32 ctrl; + int isfatal = 0; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + if (!hwerrs) + goto bail; + if (hwerrs == ~0ULL) { + qib_dev_err(dd, "Read of hardware error status failed " + "(all bits set); ignoring\n"); + goto bail; + } + qib_stats.sps_hwerrs++; + + /* Always clear the error status register, except BIST fail */ + qib_write_kreg(dd, kr_hwerrclear, hwerrs & + ~HWE_MASK(PowerOnBISTFailed)); + + hwerrs &= dd->cspec->hwerrmask; + + /* no EEPROM logging, yet */ + + if (hwerrs) + qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " + "(cleared)\n", (unsigned long long) hwerrs); + + ctrl = qib_read_kreg32(dd, kr_control); + if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) { + /* + * No recovery yet... + */ + if ((hwerrs & ~HWE_MASK(LATriggered)) || + dd->cspec->stay_in_freeze) { + /* + * If any set that we aren't ignoring only make the + * complaint once, in case it's stuck or recurring, + * and we get here multiple times + * Force link down, so switch knows, and + * LEDs are turned off. + */ + if (dd->flags & QIB_INITTED) + isfatal = 1; + } else + qib_7322_clear_freeze(dd); + } + + if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { + isfatal = 1; + strlcpy(msg, "[Memory BIST test failed, " + "InfiniPath hardware unusable]", msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + err_decode(msg, msgl, hwerrs, qib_7322_hwerror_msgs); + + /* Ignore esoteric PLL failures et al. */ + + qib_dev_err(dd, "%s hardware error\n", msg); + + if (isfatal && !dd->diag_client) { + qib_dev_err(dd, "Fatal Hardware Error, no longer" + " usable, SN %.16s\n", dd->serial); + /* + * for /sys status file and user programs to print; if no + * trailing brace is copied, we'll know it was truncated. + */ + if (dd->freezemsg) + snprintf(dd->freezemsg, dd->freezelen, + "{%s}", msg); + qib_disable_after_error(dd); + } +bail:; +} + +/** + * qib_7322_init_hwerrors - enable hardware errors + * @dd: the qlogic_ib device + * + * now that we have finished initializing everything that might reasonably + * cause a hardware error, and cleared those errors bits as they occur, + * we can enable hardware errors in the mask (potentially enabling + * freeze mode), and enable hardware errors as errors (along with + * everything else) in errormask + */ +static void qib_7322_init_hwerrors(struct qib_devdata *dd) +{ + int pidx; + u64 extsval; + + extsval = qib_read_kreg64(dd, kr_extstatus); + if (!(extsval & (QIB_EXTS_MEMBIST_DISABLED | + QIB_EXTS_MEMBIST_ENDTEST))) + qib_dev_err(dd, "MemBIST did not complete!\n"); + + /* never clear BIST failure, so reported on each driver load */ + qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + + /* clear all */ + qib_write_kreg(dd, kr_errclear, ~0ULL); + /* enable errors that are masked, at least this first time. */ + qib_write_kreg(dd, kr_errmask, ~0ULL); + dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + qib_write_kreg_port(dd->pport + pidx, krp_errmask, + ~0ULL); +} + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing + * on chips that are count-based, rather than trigger-based. There is no + * reference counting, but that's also fine, given the intended use. + * Only chip-specific because it's all register accesses + */ +static void qib_set_7322_armlaunch(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + qib_write_kreg(dd, kr_errclear, QIB_E_SPIOARMLAUNCH); + dd->cspec->errormask |= QIB_E_SPIOARMLAUNCH; + } else + dd->cspec->errormask &= ~QIB_E_SPIOARMLAUNCH; + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +/* + * Formerly took parameter in pre-shifted, + * pre-merged form with LinkCmd and LinkInitCmd + * together, and assuming the zero was NOP. + */ +static void qib_set_ib_7322_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd) +{ + u64 mod_wd; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + * Also reset everything that we can, so we start + * completely clean when re-enabled (before we + * actually issue the disable to the IBC) + */ + qib_7322_mini_pcs_reset(ppd); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + /* + * Clear status change interrupt reduction so the + * new state is seen. + */ + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn); + } + + mod_wd = (linkcmd << IBA7322_IBCC_LINKCMD_SHIFT) | + (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a | + mod_wd); + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + +} + +/* + * The total RCV buffer memory is 64KB, used for both ports, and is + * in units of 64 bytes (same as IB flow control credit unit). + * The consumedVL unit in the same registers are in 32 byte units! + * So, a VL15 packet needs 4.50 IB credits, and 9 rx buffer chunks, + * and we can therefore allocate just 9 IB credits for 2 VL15 packets + * in krp_rxcreditvl15, rather than 10. + */ +#define RCV_BUF_UNITSZ 64 +#define NUM_RCV_BUF_UNITS(dd) ((64 * 1024) / (RCV_BUF_UNITSZ * dd->num_pports)) + +static void set_vls(struct qib_pportdata *ppd) +{ + int i, numvls, totcred, cred_vl, vl0extra; + struct qib_devdata *dd = ppd->dd; + u64 val; + + numvls = qib_num_vls(ppd->vls_operational); + + /* + * Set up per-VL credits. Below is kluge based on these assumptions: + * 1) port is disabled at the time early_init is called. + * 2) give VL15 17 credits, for two max-plausible packets. + * 3) Give VL0-N the rest, with any rounding excess used for VL0 + */ + /* 2 VL15 packets @ 288 bytes each (including IB headers) */ + totcred = NUM_RCV_BUF_UNITS(dd); + cred_vl = (2 * 288 + RCV_BUF_UNITSZ - 1) / RCV_BUF_UNITSZ; + totcred -= cred_vl; + qib_write_kreg_port(ppd, krp_rxcreditvl15, (u64) cred_vl); + cred_vl = totcred / numvls; + vl0extra = totcred - cred_vl * numvls; + qib_write_kreg_port(ppd, krp_rxcreditvl0, cred_vl + vl0extra); + for (i = 1; i < numvls; i++) + qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, cred_vl); + for (; i < 8; i++) /* no buffer space for other VLs */ + qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, 0); + + /* Notify IBC that credits need to be recalculated */ + val = qib_read_kreg_port(ppd, krp_ibsdtestiftx); + val |= SYM_MASK(IB_SDTEST_IF_TX_0, CREDIT_CHANGE); + qib_write_kreg_port(ppd, krp_ibsdtestiftx, val); + qib_write_kreg(dd, kr_scratch, 0ULL); + val &= ~SYM_MASK(IB_SDTEST_IF_TX_0, CREDIT_CHANGE); + qib_write_kreg_port(ppd, krp_ibsdtestiftx, val); + + for (i = 0; i < numvls; i++) + val = qib_read_kreg_port(ppd, krp_rxcreditvl0 + i); + val = qib_read_kreg_port(ppd, krp_rxcreditvl15); + + /* Change the number of operational VLs */ + ppd->cpspec->ibcctrl_a = (ppd->cpspec->ibcctrl_a & + ~SYM_MASK(IBCCtrlA_0, NumVLane)) | + ((u64)(numvls - 1) << SYM_LSB(IBCCtrlA_0, NumVLane)); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); +} + +/* + * The code that deals with actual SerDes is in serdes_7322_init(). + * Compared to the code for iba7220, it is minimal. + */ +static int serdes_7322_init(struct qib_pportdata *ppd); + +/** + * qib_7322_bringup_serdes - bring up the serdes + * @ppd: physical port on the qlogic_ib device + */ +static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val, guid, ibc; + unsigned long flags; + int ret = 0; + + /* + * SerDes model not in Pd, but still need to + * set up much of IBCCtrl and IBCDDRCtrl; move elsewhere + * eventually. + */ + /* Put IBC in reset, sends disabled (should be in reset already) */ + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, IBLinkEn); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + + if (qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, + crp_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7322_creg32_port(ppd, + crp_iblinkerrrecov); + } + + /* flowcontrolwatermark is in units of KBytes */ + ibc = 0x5ULL << SYM_LSB(IBCCtrlA_0, FlowCtrlWaterMark); + /* + * Flow control is sent this often, even if no changes in + * buffer space occur. Units are 128ns for this chip. + * Set to 3usec. + */ + ibc |= 24ULL << SYM_LSB(IBCCtrlA_0, FlowCtrlPeriod); + /* max error tolerance */ + ibc |= 0xfULL << SYM_LSB(IBCCtrlA_0, PhyerrThreshold); + /* IB credit flow control. */ + ibc |= 0xfULL << SYM_LSB(IBCCtrlA_0, OverrunThreshold); + /* + * set initial max size pkt IBC will send, including ICRC; it's the + * PIO buffer size in dwords, less 1; also see qib_set_mtu() + */ + ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << + SYM_LSB(IBCCtrlA_0, MaxPktLen); + ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */ + + /* + * Reset the PCS interface to the serdes (and also ibc, which is still + * in reset from above). Writes new value of ibcctrl_a as last step. + */ + qib_7322_mini_pcs_reset(ppd); + + if (!ppd->cpspec->ibcctrl_b) { + unsigned lse = ppd->link_speed_enabled; + + /* + * Not on re-init after reset, establish shadow + * and force initial config. + */ + ppd->cpspec->ibcctrl_b = qib_read_kreg_port(ppd, + krp_ibcctrl_b); + ppd->cpspec->ibcctrl_b &= ~(IBA7322_IBC_SPEED_QDR | + IBA7322_IBC_SPEED_DDR | + IBA7322_IBC_SPEED_SDR | + IBA7322_IBC_WIDTH_AUTONEG | + SYM_MASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED)); + if (lse & (lse - 1)) /* Muliple speeds enabled */ + ppd->cpspec->ibcctrl_b |= + (lse << IBA7322_IBC_SPEED_LSB) | + IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + else + ppd->cpspec->ibcctrl_b |= (lse == QIB_IB_QDR) ? + IBA7322_IBC_SPEED_QDR | + IBA7322_IBC_IBTA_1_2_MASK : + (lse == QIB_IB_DDR) ? + IBA7322_IBC_SPEED_DDR : + IBA7322_IBC_SPEED_SDR; + if ((ppd->link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) == + (IB_WIDTH_1X | IB_WIDTH_4X)) + ppd->cpspec->ibcctrl_b |= IBA7322_IBC_WIDTH_AUTONEG; + else + ppd->cpspec->ibcctrl_b |= + ppd->link_width_enabled == IB_WIDTH_4X ? + IBA7322_IBC_WIDTH_4X_ONLY : + IBA7322_IBC_WIDTH_1X_ONLY; + + /* always enable these on driver reload, not sticky */ + ppd->cpspec->ibcctrl_b |= (IBA7322_IBC_RXPOL_MASK | + IBA7322_IBC_HRTBT_MASK); + } + qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b); + + /* setup so we have more time at CFGTEST to change H1 */ + val = qib_read_kreg_port(ppd, krp_ibcctrl_c); + val &= ~SYM_MASK(IBCCtrlC_0, IB_FRONT_PORCH); + val |= 0xfULL << SYM_LSB(IBCCtrlC_0, IB_FRONT_PORCH); + qib_write_kreg_port(ppd, krp_ibcctrl_c, val); + + serdes_7322_init(ppd); + + guid = be64_to_cpu(ppd->guid); + if (!guid) { + if (dd->base_guid) + guid = be64_to_cpu(dd->base_guid) + ppd->port - 1; + ppd->guid = cpu_to_be64(guid); + } + + qib_write_kreg_port(ppd, krp_hrtbt_guid, guid); + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + + /* Enable port */ + ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn); + set_vls(ppd); + + /* initially come up DISABLED, without sending anything. */ + val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg_port(ppd, krp_ibcctrl_a, val); + qib_write_kreg(dd, kr_scratch, 0ULL); + /* clear the linkinit cmds */ + ppd->cpspec->ibcctrl_a = val & ~SYM_MASK(IBCCtrlA_0, LinkInitCmd); + + /* be paranoid against later code motion, etc. */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable); + qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* Also enable IBSTATUSCHG interrupt. */ + val = qib_read_kreg_port(ppd, krp_errmask); + qib_write_kreg_port(ppd, krp_errmask, + val | ERR_MASK_N(IBStatusChanged)); + + /* Always zero until we start messing with SerDes for real */ + return ret; +} + +/** + * qib_7322_quiet_serdes - set serdes to txidle + * @dd: the qlogic_ib device + * Called when driver is being unloaded + */ +static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) +{ + u64 val; + unsigned long flags; + + qib_set_ib_7322_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + wake_up(&ppd->cpspec->autoneg_wait); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); + if (ppd->dd->cspec->r1) + cancel_delayed_work_sync(&ppd->cpspec->ipg_work); + + ppd->cpspec->chase_end = 0; + if (ppd->cpspec->chase_timer.data) /* if initted */ + del_timer_sync(&ppd->cpspec->chase_timer); + + /* + * Despite the name, actually disables IBC as well. Do it when + * we are as sure as possible that no more packets can be + * received, following the down and the PCS reset. + * The actual disabling happens in qib_7322_mini_pci_reset(), + * along with the PCS being reset. + */ + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, IBLinkEn); + qib_7322_mini_pcs_reset(ppd); + + /* + * Update the adjusted counters so the adjustment persists + * across driver reload. + */ + if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta || + ppd->cpspec->ibdeltainprog || ppd->cpspec->iblnkdowndelta) { + struct qib_devdata *dd = ppd->dd; + u64 diagc; + + /* enable counter writes */ + diagc = qib_read_kreg64(dd, kr_hwdiagctrl); + qib_write_kreg(dd, kr_hwdiagctrl, + diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable)); + + if (ppd->cpspec->ibsymdelta || ppd->cpspec->ibdeltainprog) { + val = read_7322_creg32_port(ppd, crp_ibsymbolerr); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->ibsymsnap; + val -= ppd->cpspec->ibsymdelta; + write_7322_creg_port(ppd, crp_ibsymbolerr, val); + } + if (ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) { + val = read_7322_creg32_port(ppd, crp_iblinkerrrecov); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->iblnkerrsnap; + val -= ppd->cpspec->iblnkerrdelta; + write_7322_creg_port(ppd, crp_iblinkerrrecov, val); + } + if (ppd->cpspec->iblnkdowndelta) { + val = read_7322_creg32_port(ppd, crp_iblinkdown); + val += ppd->cpspec->iblnkdowndelta; + write_7322_creg_port(ppd, crp_iblinkdown, val); + } + /* + * No need to save ibmalfdelta since IB perfcounters + * are cleared on driver reload. + */ + + /* and disable counter writes */ + qib_write_kreg(dd, kr_hwdiagctrl, diagc); + } +} + +/** + * qib_setup_7322_setextled - set the state of the two external LEDs + * @ppd: physical port on the qlogic_ib device + * @on: whether the link is up or not + * + * The exact combo of LEDs if on is true is determined by looking + * at the ibcstatus. + * + * These LEDs indicate the physical and logical state of IB link. + * For this chip (at least with recommended board pinouts), LED1 + * is Yellow (logical state) and LED2 is Green (physical state), + * + * Note: We try to match the Mellanox HCA LED behavior as best + * we can. Green indicates physical link state is OK (something is + * plugged in, and we can train). + * Amber indicates the link is logically up (ACTIVE). + * Mellanox further blinks the amber LED to indicate data packet + * activity, but we have no hardware support for that, so it would + * require waking up every 10-20 msecs and checking the counters + * on the chip, and then turning the LED off if appropriate. That's + * visible overhead, so not something we will do. + */ +static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on) +{ + struct qib_devdata *dd = ppd->dd; + u64 extctl, ledblink = 0, val; + unsigned long flags; + int yel, grn; + + /* + * The diags use the LED to indicate diag info, so we leave + * the external LED alone when the diags are running. + */ + if (dd->diag_client) + return; + + /* Allow override of LED display for, e.g. Locating system in rack */ + if (ppd->led_override) { + grn = (ppd->led_override & QIB_LED_PHYS); + yel = (ppd->led_override & QIB_LED_LOG); + } else if (on) { + val = qib_read_kreg_port(ppd, krp_ibcstatus_a); + grn = qib_7322_phys_portstate(val) == + IB_PHYSPORTSTATE_LINKUP; + yel = qib_7322_iblink_state(val) == IB_PORT_ACTIVE; + } else { + grn = 0; + yel = 0; + } + + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + extctl = dd->cspec->extctrl & (ppd->port == 1 ? + ~ExtLED_IB1_MASK : ~ExtLED_IB2_MASK); + if (grn) { + extctl |= ppd->port == 1 ? ExtLED_IB1_GRN : ExtLED_IB2_GRN; + /* + * Counts are in chip clock (4ns) periods. + * This is 1/16 sec (66.6ms) on, + * 3/16 sec (187.5 ms) off, with packets rcvd. + */ + ledblink = ((66600 * 1000UL / 4) << IBA7322_LEDBLINK_ON_SHIFT) | + ((187500 * 1000UL / 4) << IBA7322_LEDBLINK_OFF_SHIFT); + } + if (yel) + extctl |= ppd->port == 1 ? ExtLED_IB1_YEL : ExtLED_IB2_YEL; + dd->cspec->extctrl = extctl; + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + + if (ledblink) /* blink the LED on packet receive */ + qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); +} + +/* + * Disable MSIx interrupt if enabled, call generic MSIx code + * to cleanup, and clear pending MSIx interrupts. + * Used for fallback to INTx, after reset, and when MSIx setup fails. + */ +static void qib_7322_nomsix(struct qib_devdata *dd) +{ + u64 intgranted; + int n; + + dd->cspec->main_int_mask = ~0ULL; + n = dd->cspec->num_msix_entries; + if (n) { + int i; + + dd->cspec->num_msix_entries = 0; + for (i = 0; i < n; i++) { + irq_set_affinity_hint( + dd->cspec->msix_entries[i].msix.vector, NULL); + free_cpumask_var(dd->cspec->msix_entries[i].mask); + free_irq(dd->cspec->msix_entries[i].msix.vector, + dd->cspec->msix_entries[i].arg); + } + qib_nomsix(dd); + } + /* make sure no MSIx interrupts are left pending */ + intgranted = qib_read_kreg64(dd, kr_intgranted); + if (intgranted) + qib_write_kreg(dd, kr_intgranted, intgranted); +} + +static void qib_7322_free_irq(struct qib_devdata *dd) +{ + if (dd->cspec->irq) { + free_irq(dd->cspec->irq, dd); + dd->cspec->irq = 0; + } + qib_7322_nomsix(dd); +} + +static void qib_setup_7322_cleanup(struct qib_devdata *dd) +{ + int i; + + qib_7322_free_irq(dd); + kfree(dd->cspec->cntrs); + kfree(dd->cspec->sendchkenable); + kfree(dd->cspec->sendgrhchk); + kfree(dd->cspec->sendibchk); + kfree(dd->cspec->msix_entries); + for (i = 0; i < dd->num_pports; i++) { + unsigned long flags; + u32 mask = QSFP_GPIO_MOD_PRS_N | + (QSFP_GPIO_MOD_PRS_N << QSFP_GPIO_PORT2_SHIFT); + + kfree(dd->pport[i].cpspec->portcntrs); + if (dd->flags & QIB_HAS_QSFP) { + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->gpio_mask &= ~mask; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data); + } + if (dd->pport[i].ibport_data.smi_ah) + ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah); + } +} + +/* handle SDMA interrupts */ +static void sdma_7322_intr(struct qib_devdata *dd, u64 istat) +{ + struct qib_pportdata *ppd0 = &dd->pport[0]; + struct qib_pportdata *ppd1 = &dd->pport[1]; + u64 intr0 = istat & (INT_MASK_P(SDma, 0) | + INT_MASK_P(SDmaIdle, 0) | INT_MASK_P(SDmaProgress, 0)); + u64 intr1 = istat & (INT_MASK_P(SDma, 1) | + INT_MASK_P(SDmaIdle, 1) | INT_MASK_P(SDmaProgress, 1)); + + if (intr0) + qib_sdma_intr(ppd0); + if (intr1) + qib_sdma_intr(ppd1); + + if (istat & INT_MASK_PM(SDmaCleanupDone, 0)) + qib_sdma_process_event(ppd0, qib_sdma_event_e20_hw_started); + if (istat & INT_MASK_PM(SDmaCleanupDone, 1)) + qib_sdma_process_event(ppd1, qib_sdma_event_e20_hw_started); +} + +/* + * Set or clear the Send buffer available interrupt enable bit. + */ +static void qib_wantpiobuf_7322_intr(struct qib_devdata *dd, u32 needint) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (needint) + dd->sendctrl |= SYM_MASK(SendCtrl, SendIntBufAvail); + else + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendIntBufAvail); + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0ULL); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); +} + +/* + * Somehow got an interrupt with reserved bits set in interrupt status. + * Print a message so we know it happened, then clear them. + * keep mainline interrupt handler cache-friendly + */ +static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat) +{ + u64 kills; + char msg[128]; + + kills = istat & ~QIB_I_BITSEXTANT; + qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:" + " %s\n", (unsigned long long) kills, msg); + qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills)); +} + +/* keep mainline interrupt handler cache-friendly */ +static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd) +{ + u32 gpiostatus; + int handled = 0; + int pidx; + + /* + * Boards for this chip currently don't use GPIO interrupts, + * so clear by writing GPIOstatus to GPIOclear, and complain + * to developer. To avoid endless repeats, clear + * the bits in the mask, since there is some kind of + * programming error or chip problem. + */ + gpiostatus = qib_read_kreg32(dd, kr_gpio_status); + /* + * In theory, writing GPIOstatus to GPIOclear could + * have a bad side-effect on some diagnostic that wanted + * to poll for a status-change, but the various shadows + * make that problematic at best. Diags will just suppress + * all GPIO interrupts during such tests. + */ + qib_write_kreg(dd, kr_gpio_clear, gpiostatus); + /* + * Check for QSFP MOD_PRS changes + * only works for single port if IB1 != pidx1 + */ + for (pidx = 0; pidx < dd->num_pports && (dd->flags & QIB_HAS_QSFP); + ++pidx) { + struct qib_pportdata *ppd; + struct qib_qsfp_data *qd; + u32 mask; + if (!dd->pport[pidx].link_speed_supported) + continue; + mask = QSFP_GPIO_MOD_PRS_N; + ppd = dd->pport + pidx; + mask <<= (QSFP_GPIO_PORT2_SHIFT * ppd->hw_pidx); + if (gpiostatus & dd->cspec->gpio_mask & mask) { + u64 pins; + qd = &ppd->cpspec->qsfp_data; + gpiostatus &= ~mask; + pins = qib_read_kreg64(dd, kr_extstatus); + pins >>= SYM_LSB(EXTStatus, GPIOIn); + if (!(pins & mask)) { + ++handled; + qd->t_insert = jiffies; + queue_work(ib_wq, &qd->work); + } + } + } + + if (gpiostatus && !handled) { + const u32 mask = qib_read_kreg32(dd, kr_gpio_mask); + u32 gpio_irq = mask & gpiostatus; + + /* + * Clear any troublemakers, and update chip from shadow + */ + dd->cspec->gpio_mask &= ~gpio_irq; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + } +} + +/* + * Handle errors and unusual events first, separate function + * to improve cache hits for fast path interrupt handling. + */ +static noinline void unlikely_7322_intr(struct qib_devdata *dd, u64 istat) +{ + if (istat & ~QIB_I_BITSEXTANT) + unknown_7322_ibits(dd, istat); + if (istat & QIB_I_GPIO) + unknown_7322_gpio_intr(dd); + if (istat & QIB_I_C_ERROR) { + qib_write_kreg(dd, kr_errmask, 0ULL); + tasklet_schedule(&dd->error_tasklet); + } + if (istat & INT_MASK_P(Err, 0) && dd->rcd[0]) + handle_7322_p_errors(dd->rcd[0]->ppd); + if (istat & INT_MASK_P(Err, 1) && dd->rcd[1]) + handle_7322_p_errors(dd->rcd[1]->ppd); +} + +/* + * Dynamically adjust the rcv int timeout for a context based on incoming + * packet rate. + */ +static void adjust_rcv_timeout(struct qib_ctxtdata *rcd, int npkts) +{ + struct qib_devdata *dd = rcd->dd; + u32 timeout = dd->cspec->rcvavail_timeout[rcd->ctxt]; + + /* + * Dynamically adjust idle timeout on chip + * based on number of packets processed. + */ + if (npkts < rcv_int_count && timeout > 2) + timeout >>= 1; + else if (npkts >= rcv_int_count && timeout < rcv_int_timeout) + timeout = min(timeout << 1, rcv_int_timeout); + else + return; + + dd->cspec->rcvavail_timeout[rcd->ctxt] = timeout; + qib_write_kreg(dd, kr_rcvavailtimeout + rcd->ctxt, timeout); +} + +/* + * This is the main interrupt handler. + * It will normally only be used for low frequency interrupts but may + * have to handle all interrupts if INTx is enabled or fewer than normal + * MSIx interrupts were allocated. + * This routine should ignore the interrupt bits for any of the + * dedicated MSIx handlers. + */ +static irqreturn_t qib_7322intr(int irq, void *data) +{ + struct qib_devdata *dd = data; + irqreturn_t ret; + u64 istat; + u64 ctxtrbits; + u64 rmask; + unsigned i; + u32 npkts; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) { + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + ret = IRQ_HANDLED; + goto bail; + } + + istat = qib_read_kreg64(dd, kr_intstatus); + + if (unlikely(istat == ~0ULL)) { + qib_bad_intrstatus(dd); + qib_dev_err(dd, "Interrupt status all f's, skipping\n"); + /* don't know if it was our interrupt or not */ + ret = IRQ_NONE; + goto bail; + } + + istat &= dd->cspec->main_int_mask; + if (unlikely(!istat)) { + /* already handled, or shared and not us */ + ret = IRQ_NONE; + goto bail; + } + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + /* handle "errors" of various kinds first, device ahead of port */ + if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | + QIB_I_C_ERROR | INT_MASK_P(Err, 0) | + INT_MASK_P(Err, 1)))) + unlikely_7322_intr(dd, istat); + + /* + * Clear the interrupt bits we found set, relatively early, so we + * "know" know the chip will have seen this by the time we process + * the queue, and will re-interrupt if necessary. The processor + * itself won't take the interrupt again until we return. + */ + qib_write_kreg(dd, kr_intclear, istat); + + /* + * Handle kernel receive queues before checking for pio buffers + * available since receives can overflow; piobuf waiters can afford + * a few extra cycles, since they were waiting anyway. + */ + ctxtrbits = istat & (QIB_I_RCVAVAIL_MASK | QIB_I_RCVURG_MASK); + if (ctxtrbits) { + rmask = (1ULL << QIB_I_RCVAVAIL_LSB) | + (1ULL << QIB_I_RCVURG_LSB); + for (i = 0; i < dd->first_user_ctxt; i++) { + if (ctxtrbits & rmask) { + ctxtrbits &= ~rmask; + if (dd->rcd[i]) + qib_kreceive(dd->rcd[i], NULL, &npkts); + } + rmask <<= 1; + } + if (ctxtrbits) { + ctxtrbits = (ctxtrbits >> QIB_I_RCVAVAIL_LSB) | + (ctxtrbits >> QIB_I_RCVURG_LSB); + qib_handle_urcv(dd, ctxtrbits); + } + } + + if (istat & (QIB_I_P_SDMAINT(0) | QIB_I_P_SDMAINT(1))) + sdma_7322_intr(dd, istat); + + if ((istat & QIB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED)) + qib_ib_piobufavail(dd); + + ret = IRQ_HANDLED; +bail: + return ret; +} + +/* + * Dedicated receive packet available interrupt handler. + */ +static irqreturn_t qib_7322pintr(int irq, void *data) +{ + struct qib_ctxtdata *rcd = data; + struct qib_devdata *dd = rcd->dd; + u32 npkts; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | + (1ULL << QIB_I_RCVURG_LSB)) << rcd->ctxt); + + qib_kreceive(rcd, NULL, &npkts); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send buffer available interrupt handler. + */ +static irqreturn_t qib_7322bufavail(int irq, void *data) +{ + struct qib_devdata *dd = data; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); + + /* qib_ib_piobufavail() will clear the want PIO interrupt if needed */ + if (dd->flags & QIB_INITTED) + qib_ib_piobufavail(dd); + else + qib_wantpiobuf_7322_intr(dd, 0); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA interrupt handler. + */ +static irqreturn_t sdma_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_P(SDma, 1) : INT_MASK_P(SDma, 0)); + qib_sdma_intr(ppd); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA idle interrupt handler. + */ +static irqreturn_t sdma_idle_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_P(SDmaIdle, 1) : INT_MASK_P(SDmaIdle, 0)); + qib_sdma_intr(ppd); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA progress interrupt handler. + */ +static irqreturn_t sdma_progress_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_P(SDmaProgress, 1) : + INT_MASK_P(SDmaProgress, 0)); + qib_sdma_intr(ppd); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA cleanup interrupt handler. + */ +static irqreturn_t sdma_cleanup_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + qib_stats.sps_ints++; + if (dd->int_counter != (u32) -1) + dd->int_counter++; + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_PM(SDmaCleanupDone, 1) : + INT_MASK_PM(SDmaCleanupDone, 0)); + qib_sdma_process_event(ppd, qib_sdma_event_e20_hw_started); + + return IRQ_HANDLED; +} + +/* + * Set up our chip-specific interrupt handler. + * The interrupt type has already been setup, so + * we just need to do the registration and error checking. + * If we are using MSIx interrupts, we may fall back to + * INTx later, if the interrupt handler doesn't get called + * within 1/2 second (see verify_interrupt()). + */ +static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) +{ + int ret, i, msixnum; + u64 redirect[6]; + u64 mask; + const struct cpumask *local_mask; + int firstcpu, secondcpu = 0, currrcvcpu = 0; + + if (!dd->num_pports) + return; + + if (clearpend) { + /* + * if not switching interrupt types, be sure interrupts are + * disabled, and then clear anything pending at this point, + * because we are starting clean. + */ + qib_7322_set_intr_state(dd, 0); + + /* clear the reset error, init error/hwerror mask */ + qib_7322_init_hwerrors(dd); + + /* clear any interrupt bits that might be set */ + qib_write_kreg(dd, kr_intclear, ~0ULL); + + /* make sure no pending MSIx intr, and clear diag reg */ + qib_write_kreg(dd, kr_intgranted, ~0ULL); + qib_write_kreg(dd, kr_vecclr_wo_int, ~0ULL); + } + + if (!dd->cspec->num_msix_entries) { + /* Try to get INTx interrupt */ +try_intx: + if (!dd->pcidev->irq) { + qib_dev_err(dd, "irq is 0, BIOS error? " + "Interrupts won't work\n"); + goto bail; + } + ret = request_irq(dd->pcidev->irq, qib_7322intr, + IRQF_SHARED, QIB_DRV_NAME, dd); + if (ret) { + qib_dev_err(dd, "Couldn't setup INTx " + "interrupt (irq=%d): %d\n", + dd->pcidev->irq, ret); + goto bail; + } + dd->cspec->irq = dd->pcidev->irq; + dd->cspec->main_int_mask = ~0ULL; + goto bail; + } + + /* Try to get MSIx interrupts */ + memset(redirect, 0, sizeof redirect); + mask = ~0ULL; + msixnum = 0; + local_mask = cpumask_of_pcibus(dd->pcidev->bus); + firstcpu = cpumask_first(local_mask); + if (firstcpu >= nr_cpu_ids || + cpumask_weight(local_mask) == num_online_cpus()) { + local_mask = topology_core_cpumask(0); + firstcpu = cpumask_first(local_mask); + } + if (firstcpu < nr_cpu_ids) { + secondcpu = cpumask_next(firstcpu, local_mask); + if (secondcpu >= nr_cpu_ids) + secondcpu = firstcpu; + currrcvcpu = secondcpu; + } + for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { + irq_handler_t handler; + void *arg; + u64 val; + int lsb, reg, sh; + + dd->cspec->msix_entries[msixnum]. + name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] + = '\0'; + if (i < ARRAY_SIZE(irq_table)) { + if (irq_table[i].port) { + /* skip if for a non-configured port */ + if (irq_table[i].port > dd->num_pports) + continue; + arg = dd->pport + irq_table[i].port - 1; + } else + arg = dd; + lsb = irq_table[i].lsb; + handler = irq_table[i].handler; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d%s", dd->unit, + irq_table[i].name); + } else { + unsigned ctxt; + + ctxt = i - ARRAY_SIZE(irq_table); + /* per krcvq context receive interrupt */ + arg = dd->rcd[ctxt]; + if (!arg) + continue; + if (qib_krcvq01_no_msi && ctxt < 2) + continue; + lsb = QIB_I_RCVAVAIL_LSB + ctxt; + handler = qib_7322pintr; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d (kctx)", dd->unit); + } + ret = request_irq( + dd->cspec->msix_entries[msixnum].msix.vector, + handler, 0, dd->cspec->msix_entries[msixnum].name, + arg); + if (ret) { + /* + * Shouldn't happen since the enable said we could + * have as many as we are trying to setup here. + */ + qib_dev_err(dd, "Couldn't setup MSIx " + "interrupt (vec=%d, irq=%d): %d\n", msixnum, + dd->cspec->msix_entries[msixnum].msix.vector, + ret); + qib_7322_nomsix(dd); + goto try_intx; + } + dd->cspec->msix_entries[msixnum].arg = arg; + if (lsb >= 0) { + reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; + sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * + SYM_LSB(IntRedirect0, vec1); + mask &= ~(1ULL << lsb); + redirect[reg] |= ((u64) msixnum) << sh; + } + val = qib_read_kreg64(dd, 2 * msixnum + 1 + + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + if (firstcpu < nr_cpu_ids && + zalloc_cpumask_var( + &dd->cspec->msix_entries[msixnum].mask, + GFP_KERNEL)) { + if (handler == qib_7322pintr) { + cpumask_set_cpu(currrcvcpu, + dd->cspec->msix_entries[msixnum].mask); + currrcvcpu = cpumask_next(currrcvcpu, + local_mask); + if (currrcvcpu >= nr_cpu_ids) + currrcvcpu = secondcpu; + } else { + cpumask_set_cpu(firstcpu, + dd->cspec->msix_entries[msixnum].mask); + } + irq_set_affinity_hint( + dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].mask); + } + msixnum++; + } + /* Initialize the vector mapping */ + for (i = 0; i < ARRAY_SIZE(redirect); i++) + qib_write_kreg(dd, kr_intredirect + i, redirect[i]); + dd->cspec->main_int_mask = mask; + tasklet_init(&dd->error_tasklet, qib_error_tasklet, + (unsigned long)dd); +bail:; +} + +/** + * qib_7322_boardname - fill in the board name and note features + * @dd: the qlogic_ib device + * + * info will be based on the board revision register + */ +static unsigned qib_7322_boardname(struct qib_devdata *dd) +{ + /* Will need enumeration of board-types here */ + char *n; + u32 boardid, namelen; + unsigned features = DUAL_PORT_CAP; + + boardid = SYM_FIELD(dd->revision, Revision, BoardID); + + switch (boardid) { + case 0: + n = "InfiniPath_QLE7342_Emulation"; + break; + case 1: + n = "InfiniPath_QLE7340"; + dd->flags |= QIB_HAS_QSFP; + features = PORT_SPD_CAP; + break; + case 2: + n = "InfiniPath_QLE7342"; + dd->flags |= QIB_HAS_QSFP; + break; + case 3: + n = "InfiniPath_QMI7342"; + break; + case 4: + n = "InfiniPath_Unsupported7342"; + qib_dev_err(dd, "Unsupported version of QMH7342\n"); + features = 0; + break; + case BOARD_QMH7342: + n = "InfiniPath_QMH7342"; + features = 0x24; + break; + case BOARD_QME7342: + n = "InfiniPath_QME7342"; + break; + case 8: + n = "InfiniPath_QME7362"; + dd->flags |= QIB_HAS_QSFP; + break; + case 15: + n = "InfiniPath_QLE7342_TEST"; + dd->flags |= QIB_HAS_QSFP; + break; + default: + n = "InfiniPath_QLE73xy_UNKNOWN"; + qib_dev_err(dd, "Unknown 7322 board type %u\n", boardid); + break; + } + dd->board_atten = 1; /* index into txdds_Xdr */ + + namelen = strlen(n) + 1; + dd->boardname = kmalloc(namelen, GFP_KERNEL); + if (!dd->boardname) + qib_dev_err(dd, "Failed allocation for board name: %s\n", n); + else + snprintf(dd->boardname, namelen, "%s", n); + + snprintf(dd->boardversion, sizeof(dd->boardversion), + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", + QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, + (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + dd->majrev, dd->minrev, + (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + + if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { + qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode" + " by module parameter\n", dd->unit); + features &= PORT_SPD_CAP; + } + + return features; +} + +/* + * This routine sleeps, so it can only be called from user context, not + * from interrupt context. + */ +static int qib_do_7322_reset(struct qib_devdata *dd) +{ + u64 val; + u64 *msix_vecsave; + int i, msix_entries, ret = 1; + u16 cmdval; + u8 int_line, clinesz; + unsigned long flags; + + /* Use dev_err so it shows up in logs, etc. */ + qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit); + + qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz); + + msix_entries = dd->cspec->num_msix_entries; + + /* no interrupts till re-initted */ + qib_7322_set_intr_state(dd, 0); + + if (msix_entries) { + qib_7322_nomsix(dd); + /* can be up to 512 bytes, too big for stack */ + msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries * + sizeof(u64), GFP_KERNEL); + if (!msix_vecsave) + qib_dev_err(dd, "No mem to save MSIx data\n"); + } else + msix_vecsave = NULL; + + /* + * Core PCI (as of 2.6.18) doesn't save or rewrite the full vector + * info that is set up by the BIOS, so we have to save and restore + * it ourselves. There is some risk something could change it, + * after we save it, but since we have disabled the MSIx, it + * shouldn't be touched... + */ + for (i = 0; i < msix_entries; i++) { + u64 vecaddr, vecdata; + vecaddr = qib_read_kreg64(dd, 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + vecdata = qib_read_kreg64(dd, 1 + 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + if (msix_vecsave) { + msix_vecsave[2 * i] = vecaddr; + /* save it without the masked bit set */ + msix_vecsave[1 + 2 * i] = vecdata & ~0x100000000ULL; + } + } + + dd->pport->cpspec->ibdeltainprog = 0; + dd->pport->cpspec->ibsymdelta = 0; + dd->pport->cpspec->iblnkerrdelta = 0; + dd->pport->cpspec->ibmalfdelta = 0; + dd->int_counter = 0; /* so we check interrupts work again */ + + /* + * Keep chip from being accessed until we are ready. Use + * writeq() directly, to allow the write even though QIB_PRESENT + * isn't set. + */ + dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR); + dd->flags |= QIB_DOING_RESET; + val = dd->control | QLOGIC_IB_C_RESET; + writeq(val, &dd->kregbase[kr_control]); + + for (i = 1; i <= 5; i++) { + /* + * Allow MBIST, etc. to complete; longer on each retry. + * We sometimes get machine checks from bus timeout if no + * response, so for now, make it *really* long. + */ + msleep(1000 + (1 + i) * 3000); + + qib_pcie_reenable(dd, cmdval, int_line, clinesz); + + /* + * Use readq directly, so we don't need to mark it as PRESENT + * until we get a successful indication that all is well. + */ + val = readq(&dd->kregbase[kr_revision]); + if (val == dd->revision) + break; + if (i == 5) { + qib_dev_err(dd, "Failed to initialize after reset, " + "unusable\n"); + ret = 0; + goto bail; + } + } + + dd->flags |= QIB_PRESENT; /* it's back */ + + if (msix_entries) { + /* restore the MSIx vector address and data if saved above */ + for (i = 0; i < msix_entries; i++) { + dd->cspec->msix_entries[i].msix.entry = i; + if (!msix_vecsave || !msix_vecsave[2 * i]) + continue; + qib_write_kreg(dd, 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64)), + msix_vecsave[2 * i]); + qib_write_kreg(dd, 1 + 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64)), + msix_vecsave[1 + 2 * i]); + } + } + + /* initialize the remaining registers. */ + for (i = 0; i < dd->num_pports; ++i) + write_7322_init_portregs(&dd->pport[i]); + write_7322_initregs(dd); + + if (qib_pcie_params(dd, dd->lbus_width, + &dd->cspec->num_msix_entries, + dd->cspec->msix_entries)) + qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; " + "continuing anyway\n"); + + qib_setup_7322_interrupt(dd, 1); + + for (i = 0; i < dd->num_pports; ++i) { + struct qib_pportdata *ppd = &dd->pport[i]; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_FORCE_NOTIFY; + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + +bail: + dd->flags &= ~QIB_DOING_RESET; /* OK or not, no longer resetting */ + kfree(msix_vecsave); + return ret; +} + +/** + * qib_7322_put_tid - write a TID to the chip + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: 0 for eager, 1 for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + */ +static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + if (!(dd->flags & QIB_PRESENT)) + return; + if (pa != dd->tidinvalid) { + u64 chippa = pa >> IBA7322_TID_PA_SHIFT; + + /* paranoia checks */ + if (pa != (chippa << IBA7322_TID_PA_SHIFT)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) { + qib_dev_err(dd, "Physical page address 0x%lx " + "larger than supported\n", pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + chippa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + chippa |= IBA7322_TID_SZ_4K; + pa = chippa; + } + writeq(pa, tidptr); + mmiowb(); +} + +/** + * qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager + * @dd: the qlogic_ib device + * @ctxt: the ctxt + * + * clear all TID entries for a ctxt, expected and eager. + * Used from qib_close(). + */ +static void qib_7322_clear_tids(struct qib_devdata *dd, + struct qib_ctxtdata *rcd) +{ + u64 __iomem *tidbase; + unsigned long tidinv; + u32 ctxt; + int i; + + if (!dd->kregbase || !rcd) + return; + + ctxt = rcd->ctxt; + + tidinv = dd->tidinvalid; + tidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + + dd->rcvtidbase + + ctxt * dd->rcvtidcnt * sizeof(*tidbase)); + + for (i = 0; i < dd->rcvtidcnt; i++) + qib_7322_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); + + tidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + + dd->rcvegrbase + + rcd->rcvegr_tid_base * sizeof(*tidbase)); + + for (i = 0; i < rcd->rcvegrcnt; i++) + qib_7322_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); +} + +/** + * qib_7322_tidtemplate - setup constants for TID updates + * @dd: the qlogic_ib device + * + * We setup stuff that we use a lot, to avoid calculating each time + */ +static void qib_7322_tidtemplate(struct qib_devdata *dd) +{ + /* + * For now, we always allocate 4KB buffers (at init) so we can + * receive max size packets. We may want a module parameter to + * specify 2KB or 4KB and/or make it per port instead of per device + * for those who want to reduce memory footprint. Note that the + * rcvhdrentsize size must be large enough to hold the largest + * IB header (currently 96 bytes) that we expect to handle (plus of + * course the 2 dwords of RHF). + */ + if (dd->rcvegrbufsize == 2048) + dd->tidtemplate = IBA7322_TID_SZ_2K; + else if (dd->rcvegrbufsize == 4096) + dd->tidtemplate = IBA7322_TID_SZ_4K; + dd->tidinvalid = 0; +} + +/** + * qib_init_7322_get_base_info - set chip-specific flags for user code + * @rcd: the qlogic_ib ctxt + * @kbase: qib_base_info pointer + * + * We set the PCIE flag because the lower bandwidth on PCIe vs + * HyperTransport can affect some user packet algorithims. + */ + +static int qib_7322_get_base_info(struct qib_ctxtdata *rcd, + struct qib_base_info *kinfo) +{ + kinfo->spi_runtime_flags |= QIB_RUNTIME_CTXT_MSB_IN_QP | + QIB_RUNTIME_PCIE | QIB_RUNTIME_NODMA_RTAIL | + QIB_RUNTIME_HDRSUPP | QIB_RUNTIME_SDMA; + if (rcd->dd->cspec->r1) + kinfo->spi_runtime_flags |= QIB_RUNTIME_RCHK; + if (rcd->dd->flags & QIB_USE_SPCL_TRIG) + kinfo->spi_runtime_flags |= QIB_RUNTIME_SPECIAL_TRIGGER; + + return 0; +} + +static struct qib_message_header * +qib_7322_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) +{ + u32 offset = qib_hdrget_offset(rhf_addr); + + return (struct qib_message_header *) + (rhf_addr - dd->rhf_offset + offset); +} + +/* + * Configure number of contexts. + */ +static void qib_7322_config_ctxts(struct qib_devdata *dd) +{ + unsigned long flags; + u32 nchipctxts; + + nchipctxts = qib_read_kreg32(dd, kr_contextcnt); + dd->cspec->numctxts = nchipctxts; + if (qib_n_krcv_queues > 1 && dd->num_pports) { + dd->first_user_ctxt = NUM_IB_PORTS + + (qib_n_krcv_queues - 1) * dd->num_pports; + if (dd->first_user_ctxt > nchipctxts) + dd->first_user_ctxt = nchipctxts; + dd->n_krcv_queues = dd->first_user_ctxt / dd->num_pports; + } else { + dd->first_user_ctxt = NUM_IB_PORTS; + dd->n_krcv_queues = 1; + } + + if (!qib_cfgctxts) { + int nctxts = dd->first_user_ctxt + num_online_cpus(); + + if (nctxts <= 6) + dd->ctxtcnt = 6; + else if (nctxts <= 10) + dd->ctxtcnt = 10; + else if (nctxts <= nchipctxts) + dd->ctxtcnt = nchipctxts; + } else if (qib_cfgctxts < dd->num_pports) + dd->ctxtcnt = dd->num_pports; + else if (qib_cfgctxts <= nchipctxts) + dd->ctxtcnt = qib_cfgctxts; + if (!dd->ctxtcnt) /* none of the above, set to max */ + dd->ctxtcnt = nchipctxts; + + /* + * Chip can be configured for 6, 10, or 18 ctxts, and choice + * affects number of eager TIDs per ctxt (1K, 2K, 4K). + * Lock to be paranoid about later motion, etc. + */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + if (dd->ctxtcnt > 10) + dd->rcvctrl |= 2ULL << SYM_LSB(RcvCtrl, ContextCfg); + else if (dd->ctxtcnt > 6) + dd->rcvctrl |= 1ULL << SYM_LSB(RcvCtrl, ContextCfg); + /* else configure for default 6 receive ctxts */ + + /* The XRC opcode is 5. */ + dd->rcvctrl |= 5ULL << SYM_LSB(RcvCtrl, XrcTypeCode); + + /* + * RcvCtrl *must* be written here so that the + * chip understands how to change rcvegrcnt below. + */ + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* kr_rcvegrcnt changes based on the number of contexts enabled */ + dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt); + if (qib_rcvhdrcnt) + dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt); + else + dd->rcvhdrcnt = 2 * max(dd->cspec->rcvegrcnt, + dd->num_pports > 1 ? 1024U : 2048U); +} + +static int qib_7322_get_ib_cfg(struct qib_pportdata *ppd, int which) +{ + + int lsb, ret = 0; + u64 maskr; /* right-justified mask */ + + switch (which) { + + case QIB_IB_CFG_LWID_ENB: /* Get allowed Link-width */ + ret = ppd->link_width_enabled; + goto done; + + case QIB_IB_CFG_LWID: /* Get currently active Link-width */ + ret = ppd->link_width_active; + goto done; + + case QIB_IB_CFG_SPD_ENB: /* Get allowed Link speeds */ + ret = ppd->link_speed_enabled; + goto done; + + case QIB_IB_CFG_SPD: /* Get current Link spd */ + ret = ppd->link_speed_active; + goto done; + + case QIB_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + maskr = SYM_RMASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + break; + + case QIB_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + maskr = SYM_RMASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + break; + + case QIB_IB_CFG_LINKLATENCY: + ret = qib_read_kreg_port(ppd, krp_ibcstatus_b) & + SYM_MASK(IBCStatusB_0, LinkRoundTripLatency); + goto done; + + case QIB_IB_CFG_OP_VLS: + ret = ppd->vls_operational; + goto done; + + case QIB_IB_CFG_VL_HIGH_CAP: + ret = 16; + goto done; + + case QIB_IB_CFG_VL_LOW_CAP: + ret = 16; + goto done; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + OverrunThreshold); + goto done; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + PhyerrThreshold); + goto done; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + ret = (ppd->cpspec->ibcctrl_a & + SYM_MASK(IBCCtrlA_0, LinkDownDefaultState)) ? + IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL; + goto done; + + case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */ + lsb = IBA7322_IBC_HRTBT_LSB; + maskr = IBA7322_IBC_HRTBT_RMASK; /* OR of AUTO and ENB */ + break; + + case QIB_IB_CFG_PMA_TICKS: + /* + * 0x00 = 10x link transfer rate or 4 nsec. for 2.5Gbs + * Since the clock is always 250MHz, the value is 3, 1 or 0. + */ + if (ppd->link_speed_active == QIB_IB_QDR) + ret = 3; + else if (ppd->link_speed_active == QIB_IB_DDR) + ret = 1; + else + ret = 0; + goto done; + + default: + ret = -EINVAL; + goto done; + } + ret = (int)((ppd->cpspec->ibcctrl_b >> lsb) & maskr); +done: + return ret; +} + +/* + * Below again cribbed liberally from older version. Do not lean + * heavily on it. + */ +#define IBA7322_IBC_DLIDLMC_SHIFT QIB_7322_IBCCtrlB_0_IB_DLID_LSB +#define IBA7322_IBC_DLIDLMC_MASK (QIB_7322_IBCCtrlB_0_IB_DLID_RMASK \ + | (QIB_7322_IBCCtrlB_0_IB_DLID_MASK_RMASK << 16)) + +static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) +{ + struct qib_devdata *dd = ppd->dd; + u64 maskr; /* right-justified mask */ + int lsb, ret = 0; + u16 lcmd, licmd; + unsigned long flags; + + switch (which) { + case QIB_IB_CFG_LIDLMC: + /* + * Set LID and LMC. Combined to avoid possible hazard + * caller puts LMC in 16MSbits, DLID in 16LSbits of val + */ + lsb = IBA7322_IBC_DLIDLMC_SHIFT; + maskr = IBA7322_IBC_DLIDLMC_MASK; + /* + * For header-checking, the SLID in the packet will + * be masked with SendIBSLMCMask, and compared + * with SendIBSLIDAssignMask. Make sure we do not + * set any bits not covered by the mask, or we get + * false-positives. + */ + qib_write_kreg_port(ppd, krp_sendslid, + val & (val >> 16) & SendIBSLIDAssignMask); + qib_write_kreg_port(ppd, krp_sendslidmask, + (val >> 16) & SendIBSLMCMask); + break; + + case QIB_IB_CFG_LWID_ENB: /* set allowed Link-width */ + ppd->link_width_enabled = val; + /* convert IB value to chip register value */ + if (val == IB_WIDTH_1X) + val = 0; + else if (val == IB_WIDTH_4X) + val = 1; + else + val = 3; + maskr = SYM_RMASK(IBCCtrlB_0, IB_NUM_CHANNELS); + lsb = SYM_LSB(IBCCtrlB_0, IB_NUM_CHANNELS); + break; + + case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */ + /* + * As with width, only write the actual register if the + * link is currently down, otherwise takes effect on next + * link change. Since setting is being explicitly requested + * (via MAD or sysfs), clear autoneg failure status if speed + * autoneg is enabled. + */ + ppd->link_speed_enabled = val; + val <<= IBA7322_IBC_SPEED_LSB; + maskr = IBA7322_IBC_SPEED_MASK | IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + if (val & (val - 1)) { + /* Muliple speeds enabled */ + val |= IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (val & IBA7322_IBC_SPEED_QDR) + val |= IBA7322_IBC_IBTA_1_2_MASK; + /* IBTA 1.2 mode + min/max + speed bits are contiguous */ + lsb = SYM_LSB(IBCCtrlB_0, IB_ENHANCED_MODE); + break; + + case QIB_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + maskr = SYM_RMASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + break; + + case QIB_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + maskr = SYM_RMASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + OverrunThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, OverrunThreshold); + ppd->cpspec->ibcctrl_a |= (u64) val << + SYM_LSB(IBCCtrlA_0, OverrunThreshold); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + } + goto bail; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + PhyerrThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, PhyerrThreshold); + ppd->cpspec->ibcctrl_a |= (u64) val << + SYM_LSB(IBCCtrlA_0, PhyerrThreshold); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + } + goto bail; + + case QIB_IB_CFG_PKEYS: /* update pkeys */ + maskr = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + qib_write_kreg_port(ppd, krp_partitionkey, maskr); + goto bail; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + if (val == IB_LINKINITCMD_POLL) + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, LinkDownDefaultState); + else /* SLEEP */ + ppd->cpspec->ibcctrl_a |= + SYM_MASK(IBCCtrlA_0, LinkDownDefaultState); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + goto bail; + + case QIB_IB_CFG_MTU: /* update the MTU in IBC */ + /* + * Update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords + * Set even if it's unchanged, print debug message only + * on changes. + */ + val = (ppd->ibmaxlen >> 2) + 1; + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, MaxPktLen); + ppd->cpspec->ibcctrl_a |= (u64)val << + SYM_LSB(IBCCtrlA_0, MaxPktLen); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + goto bail; + + case QIB_IB_CFG_LSTATE: /* set the IB link state */ + switch (val & 0xffff0000) { + case IB_LINKCMD_DOWN: + lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN; + ppd->cpspec->ibmalfusesnap = 1; + ppd->cpspec->ibmalfsnap = read_7322_creg32_port(ppd, + crp_errlink); + if (!ppd->cpspec->ibdeltainprog && + qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = + read_7322_creg32_port(ppd, + crp_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = + read_7322_creg32_port(ppd, + crp_iblinkerrrecov); + } + break; + + case IB_LINKCMD_ARMED: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED; + if (ppd->cpspec->ibmalfusesnap) { + ppd->cpspec->ibmalfusesnap = 0; + ppd->cpspec->ibmalfdelta += + read_7322_creg32_port(ppd, + crp_errlink) - + ppd->cpspec->ibmalfsnap; + } + break; + + case IB_LINKCMD_ACTIVE: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16); + goto bail; + } + switch (val & 0xffff) { + case IB_LINKINITCMD_NOP: + licmd = 0; + break; + + case IB_LINKINITCMD_POLL: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL; + break; + + case IB_LINKINITCMD_SLEEP: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP; + break; + + case IB_LINKINITCMD_DISABLE: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE; + ppd->cpspec->chase_end = 0; + /* + * stop state chase counter and timer, if running. + * wait forpending timer, but don't clear .data (ppd)! + */ + if (ppd->cpspec->chase_timer.expires) { + del_timer_sync(&ppd->cpspec->chase_timer); + ppd->cpspec->chase_timer.expires = 0; + } + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkinitcmd req 0x%x\n", + val & 0xffff); + goto bail; + } + qib_set_ib_7322_lstate(ppd, lcmd, licmd); + goto bail; + + case QIB_IB_CFG_OP_VLS: + if (ppd->vls_operational != val) { + ppd->vls_operational = val; + set_vls(ppd); + } + goto bail; + + case QIB_IB_CFG_VL_HIGH_LIMIT: + qib_write_kreg_port(ppd, krp_highprio_limit, val); + goto bail; + + case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */ + if (val > 3) { + ret = -EINVAL; + goto bail; + } + lsb = IBA7322_IBC_HRTBT_LSB; + maskr = IBA7322_IBC_HRTBT_RMASK; /* OR of AUTO and ENB */ + break; + + case QIB_IB_CFG_PORT: + /* val is the port number of the switch we are connected to. */ + if (ppd->dd->cspec->r1) { + cancel_delayed_work(&ppd->cpspec->ipg_work); + ppd->cpspec->ipg_tries = 0; + } + goto bail; + + default: + ret = -EINVAL; + goto bail; + } + ppd->cpspec->ibcctrl_b &= ~(maskr << lsb); + ppd->cpspec->ibcctrl_b |= (((u64) val & maskr) << lsb); + qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b); + qib_write_kreg(dd, kr_scratch, 0); +bail: + return ret; +} + +static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what) +{ + int ret = 0; + u64 val, ctrlb; + + /* only IBC loopback, may add serdes and xgxs loopbacks later */ + if (!strncmp(what, "ibc", 3)) { + ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, + Loopback); + val = 0; /* disable heart beat, so link will come up */ + qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n", + ppd->dd->unit, ppd->port); + } else if (!strncmp(what, "off", 3)) { + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, + Loopback); + /* enable heart beat again */ + val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB; + qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " + "(normal)\n", ppd->dd->unit, ppd->port); + } else + ret = -EINVAL; + if (!ret) { + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + ctrlb = ppd->cpspec->ibcctrl_b & ~(IBA7322_IBC_HRTBT_MASK + << IBA7322_IBC_HRTBT_LSB); + ppd->cpspec->ibcctrl_b = ctrlb | val; + qib_write_kreg_port(ppd, krp_ibcctrl_b, + ppd->cpspec->ibcctrl_b); + qib_write_kreg(ppd->dd, kr_scratch, 0); + } + return ret; +} + +static void get_vl_weights(struct qib_pportdata *ppd, unsigned regno, + struct ib_vl_weight_elem *vl) +{ + unsigned i; + + for (i = 0; i < 16; i++, regno++, vl++) { + u32 val = qib_read_kreg_port(ppd, regno); + + vl->vl = (val >> SYM_LSB(LowPriority0_0, VirtualLane)) & + SYM_RMASK(LowPriority0_0, VirtualLane); + vl->weight = (val >> SYM_LSB(LowPriority0_0, Weight)) & + SYM_RMASK(LowPriority0_0, Weight); + } +} + +static void set_vl_weights(struct qib_pportdata *ppd, unsigned regno, + struct ib_vl_weight_elem *vl) +{ + unsigned i; + + for (i = 0; i < 16; i++, regno++, vl++) { + u64 val; + + val = ((vl->vl & SYM_RMASK(LowPriority0_0, VirtualLane)) << + SYM_LSB(LowPriority0_0, VirtualLane)) | + ((vl->weight & SYM_RMASK(LowPriority0_0, Weight)) << + SYM_LSB(LowPriority0_0, Weight)); + qib_write_kreg_port(ppd, regno, val); + } + if (!(ppd->p_sendctrl & SYM_MASK(SendCtrl_0, IBVLArbiterEn))) { + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, IBVLArbiterEn); + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + } +} + +static int qib_7322_get_ib_table(struct qib_pportdata *ppd, int which, void *t) +{ + switch (which) { + case QIB_IB_TBL_VL_HIGH_ARB: + get_vl_weights(ppd, krp_highprio_0, t); + break; + + case QIB_IB_TBL_VL_LOW_ARB: + get_vl_weights(ppd, krp_lowprio_0, t); + break; + + default: + return -EINVAL; + } + return 0; +} + +static int qib_7322_set_ib_table(struct qib_pportdata *ppd, int which, void *t) +{ + switch (which) { + case QIB_IB_TBL_VL_HIGH_ARB: + set_vl_weights(ppd, krp_highprio_0, t); + break; + + case QIB_IB_TBL_VL_LOW_ARB: + set_vl_weights(ppd, krp_lowprio_0, t); + break; + + default: + return -EINVAL; + } + return 0; +} + +static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd, + u32 updegr, u32 egrhd, u32 npkts) +{ + /* + * Need to write timeout register before updating rcvhdrhead to ensure + * that the timer is enabled on reception of a packet. + */ + if (hd >> IBA7322_HDRHEAD_PKTINT_SHIFT) + adjust_rcv_timeout(rcd, npkts); + if (updegr) + qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); +} + +static u32 qib_7322_hdrqempty(struct qib_ctxtdata *rcd) +{ + u32 head, tail; + + head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt); + if (rcd->rcvhdrtail_kvaddr) + tail = qib_get_rcvhdrtail(rcd); + else + tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt); + return head == tail; +} + +#define RCVCTRL_COMMON_MODS (QIB_RCVCTRL_CTXT_ENB | \ + QIB_RCVCTRL_CTXT_DIS | \ + QIB_RCVCTRL_TIDFLOW_ENB | \ + QIB_RCVCTRL_TIDFLOW_DIS | \ + QIB_RCVCTRL_TAILUPD_ENB | \ + QIB_RCVCTRL_TAILUPD_DIS | \ + QIB_RCVCTRL_INTRAVAIL_ENB | \ + QIB_RCVCTRL_INTRAVAIL_DIS | \ + QIB_RCVCTRL_BP_ENB | \ + QIB_RCVCTRL_BP_DIS) + +#define RCVCTRL_PORT_MODS (QIB_RCVCTRL_CTXT_ENB | \ + QIB_RCVCTRL_CTXT_DIS | \ + QIB_RCVCTRL_PKEY_DIS | \ + QIB_RCVCTRL_PKEY_ENB) + +/* + * Modify the RCVCTRL register in chip-specific way. This + * is a function because bit positions and (future) register + * location is chip-specifc, but the needed operations are + * generic. is a bit-mask because we often want to + * do multiple modifications. + */ +static void rcvctrl_7322_mod(struct qib_pportdata *ppd, unsigned int op, + int ctxt) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + u64 mask, val; + unsigned long flags; + + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + + if (op & QIB_RCVCTRL_TIDFLOW_ENB) + dd->rcvctrl |= SYM_MASK(RcvCtrl, TidFlowEnable); + if (op & QIB_RCVCTRL_TIDFLOW_DIS) + dd->rcvctrl &= ~SYM_MASK(RcvCtrl, TidFlowEnable); + if (op & QIB_RCVCTRL_TAILUPD_ENB) + dd->rcvctrl |= SYM_MASK(RcvCtrl, TailUpd); + if (op & QIB_RCVCTRL_TAILUPD_DIS) + dd->rcvctrl &= ~SYM_MASK(RcvCtrl, TailUpd); + if (op & QIB_RCVCTRL_PKEY_ENB) + ppd->p_rcvctrl &= ~SYM_MASK(RcvCtrl_0, RcvPartitionKeyDisable); + if (op & QIB_RCVCTRL_PKEY_DIS) + ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvPartitionKeyDisable); + if (ctxt < 0) { + mask = (1ULL << dd->ctxtcnt) - 1; + rcd = NULL; + } else { + mask = (1ULL << ctxt); + rcd = dd->rcd[ctxt]; + } + if ((op & QIB_RCVCTRL_CTXT_ENB) && rcd) { + ppd->p_rcvctrl |= + (mask << SYM_LSB(RcvCtrl_0, ContextEnableKernel)); + if (!(dd->flags & QIB_NODMA_RTAIL)) { + op |= QIB_RCVCTRL_TAILUPD_ENB; /* need reg write */ + dd->rcvctrl |= SYM_MASK(RcvCtrl, TailUpd); + } + /* Write these registers before the context is enabled. */ + qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, ctxt, + rcd->rcvhdrqtailaddr_phys); + qib_write_kreg_ctxt(dd, krc_rcvhdraddr, ctxt, + rcd->rcvhdrq_phys); + rcd->seq_cnt = 1; + } + if (op & QIB_RCVCTRL_CTXT_DIS) + ppd->p_rcvctrl &= + ~(mask << SYM_LSB(RcvCtrl_0, ContextEnableKernel)); + if (op & QIB_RCVCTRL_BP_ENB) + dd->rcvctrl |= mask << SYM_LSB(RcvCtrl, dontDropRHQFull); + if (op & QIB_RCVCTRL_BP_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, dontDropRHQFull)); + if (op & QIB_RCVCTRL_INTRAVAIL_ENB) + dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, IntrAvail)); + if (op & QIB_RCVCTRL_INTRAVAIL_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, IntrAvail)); + /* + * Decide which registers to write depending on the ops enabled. + * Special case is "flush" (no bits set at all) + * which needs to write both. + */ + if (op == 0 || (op & RCVCTRL_COMMON_MODS)) + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + if (op == 0 || (op & RCVCTRL_PORT_MODS)) + qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl); + if ((op & QIB_RCVCTRL_CTXT_ENB) && dd->rcd[ctxt]) { + /* + * Init the context registers also; if we were + * disabled, tail and head should both be zero + * already from the enable, but since we don't + * know, we have to do it explicitly. + */ + val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); + qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); + + /* be sure enabling write seen; hd/tl should be 0 */ + (void) qib_read_kreg32(dd, kr_scratch); + val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt); + dd->rcd[ctxt]->head = val; + /* If kctxt, interrupt on next receive. */ + if (ctxt < dd->first_user_ctxt) + val |= dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } else if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && + dd->rcd[ctxt] && dd->rhdrhead_intr_off) { + /* arm rcv interrupt */ + val = dd->rcd[ctxt]->head | dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_DIS) { + unsigned f; + + /* Now that the context is disabled, clear these registers. */ + if (ctxt >= 0) { + qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, ctxt, 0); + qib_write_kreg_ctxt(dd, krc_rcvhdraddr, ctxt, 0); + for (f = 0; f < NUM_TIDFLOWS_CTXT; f++) + qib_write_ureg(dd, ur_rcvflowtable + f, + TIDFLOW_ERRBITS, ctxt); + } else { + unsigned i; + + for (i = 0; i < dd->cfgctxts; i++) { + qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, + i, 0); + qib_write_kreg_ctxt(dd, krc_rcvhdraddr, i, 0); + for (f = 0; f < NUM_TIDFLOWS_CTXT; f++) + qib_write_ureg(dd, ur_rcvflowtable + f, + TIDFLOW_ERRBITS, i); + } + } + } + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); +} + +/* + * Modify the SENDCTRL register in chip-specific way. This + * is a function where there are multiple such registers with + * slightly different layouts. + * The chip doesn't allow back-to-back sendctrl writes, so write + * the scratch register after writing sendctrl. + * + * Which register is written depends on the operation. + * Most operate on the common register, while + * SEND_ENB and SEND_DIS operate on the per-port ones. + * SEND_ENB is included in common because it can change SPCL_TRIG + */ +#define SENDCTRL_COMMON_MODS (\ + QIB_SENDCTRL_CLEAR | \ + QIB_SENDCTRL_AVAIL_DIS | \ + QIB_SENDCTRL_AVAIL_ENB | \ + QIB_SENDCTRL_AVAIL_BLIP | \ + QIB_SENDCTRL_DISARM | \ + QIB_SENDCTRL_DISARM_ALL | \ + QIB_SENDCTRL_SEND_ENB) + +#define SENDCTRL_PORT_MODS (\ + QIB_SENDCTRL_CLEAR | \ + QIB_SENDCTRL_SEND_ENB | \ + QIB_SENDCTRL_SEND_DIS | \ + QIB_SENDCTRL_FLUSH) + +static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op) +{ + struct qib_devdata *dd = ppd->dd; + u64 tmp_dd_sendctrl; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + + /* First the dd ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_CLEAR) + dd->sendctrl = 0; + if (op & QIB_SENDCTRL_AVAIL_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + else if (op & QIB_SENDCTRL_AVAIL_ENB) { + dd->sendctrl |= SYM_MASK(SendCtrl, SendBufAvailUpd); + if (dd->flags & QIB_USE_SPCL_TRIG) + dd->sendctrl |= SYM_MASK(SendCtrl, SpecialTriggerEn); + } + + /* Then the ppd ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_SEND_DIS) + ppd->p_sendctrl &= ~SYM_MASK(SendCtrl_0, SendEnable); + else if (op & QIB_SENDCTRL_SEND_ENB) + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, SendEnable); + + if (op & QIB_SENDCTRL_DISARM_ALL) { + u32 i, last; + + tmp_dd_sendctrl = dd->sendctrl; + last = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS; + /* + * Disarm any buffers that are not yet launched, + * disabling updates until done. + */ + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + for (i = 0; i < last; i++) { + qib_write_kreg(dd, kr_sendctrl, + tmp_dd_sendctrl | + SYM_MASK(SendCtrl, Disarm) | i); + qib_write_kreg(dd, kr_scratch, 0); + } + } + + if (op & QIB_SENDCTRL_FLUSH) { + u64 tmp_ppd_sendctrl = ppd->p_sendctrl; + + /* + * Now drain all the fifos. The Abort bit should never be + * needed, so for now, at least, we don't use it. + */ + tmp_ppd_sendctrl |= + SYM_MASK(SendCtrl_0, TxeDrainRmFifo) | + SYM_MASK(SendCtrl_0, TxeDrainLaFifo) | + SYM_MASK(SendCtrl_0, TxeBypassIbc); + qib_write_kreg_port(ppd, krp_sendctrl, tmp_ppd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + tmp_dd_sendctrl = dd->sendctrl; + + if (op & QIB_SENDCTRL_DISARM) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) | + ((op & QIB_7322_SendCtrl_DisarmSendBuf_RMASK) << + SYM_LSB(SendCtrl, DisarmSendBuf)); + if ((op & QIB_SENDCTRL_AVAIL_BLIP) && + (dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd))) + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + + if (op == 0 || (op & SENDCTRL_COMMON_MODS)) { + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + if (op == 0 || (op & SENDCTRL_PORT_MODS)) { + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + if (op & QIB_SENDCTRL_AVAIL_BLIP) { + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + if (op & QIB_SENDCTRL_FLUSH) { + u32 v; + /* + * ensure writes have hit chip, then do a few + * more reads, to allow DMA of pioavail registers + * to occur, so in-memory copy is in sync with + * the chip. Not always safe to sleep. + */ + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + qib_read_kreg32(dd, kr_scratch); + } +} + +#define _PORT_VIRT_FLAG 0x8000U /* "virtual", need adjustments */ +#define _PORT_64BIT_FLAG 0x10000U /* not "virtual", but 64bit */ +#define _PORT_CNTR_IDXMASK 0x7fffU /* mask off flags above */ + +/** + * qib_portcntr_7322 - read a per-port chip counter + * @ppd: the qlogic_ib pport + * @creg: the counter to read (not a chip offset) + */ +static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg) +{ + struct qib_devdata *dd = ppd->dd; + u64 ret = 0ULL; + u16 creg; + /* 0xffff for unimplemented or synthesized counters */ + static const u32 xlator[] = { + [QIBPORTCNTR_PKTSEND] = crp_pktsend | _PORT_64BIT_FLAG, + [QIBPORTCNTR_WORDSEND] = crp_wordsend | _PORT_64BIT_FLAG, + [QIBPORTCNTR_PSXMITDATA] = crp_psxmitdatacount, + [QIBPORTCNTR_PSXMITPKTS] = crp_psxmitpktscount, + [QIBPORTCNTR_PSXMITWAIT] = crp_psxmitwaitcount, + [QIBPORTCNTR_SENDSTALL] = crp_sendstall, + [QIBPORTCNTR_PKTRCV] = crp_pktrcv | _PORT_64BIT_FLAG, + [QIBPORTCNTR_PSRCVDATA] = crp_psrcvdatacount, + [QIBPORTCNTR_PSRCVPKTS] = crp_psrcvpktscount, + [QIBPORTCNTR_RCVEBP] = crp_rcvebp, + [QIBPORTCNTR_RCVOVFL] = crp_rcvovfl, + [QIBPORTCNTR_WORDRCV] = crp_wordrcv | _PORT_64BIT_FLAG, + [QIBPORTCNTR_RXDROPPKT] = 0xffff, /* not needed for 7322 */ + [QIBPORTCNTR_RXLOCALPHYERR] = crp_rxotherlocalphyerr, + [QIBPORTCNTR_RXVLERR] = crp_rxvlerr, + [QIBPORTCNTR_ERRICRC] = crp_erricrc, + [QIBPORTCNTR_ERRVCRC] = crp_errvcrc, + [QIBPORTCNTR_ERRLPCRC] = crp_errlpcrc, + [QIBPORTCNTR_BADFORMAT] = crp_badformat, + [QIBPORTCNTR_ERR_RLEN] = crp_err_rlen, + [QIBPORTCNTR_IBSYMBOLERR] = crp_ibsymbolerr, + [QIBPORTCNTR_INVALIDRLEN] = crp_invalidrlen, + [QIBPORTCNTR_UNSUPVL] = crp_txunsupvl, + [QIBPORTCNTR_EXCESSBUFOVFL] = crp_excessbufferovfl, + [QIBPORTCNTR_ERRLINK] = crp_errlink, + [QIBPORTCNTR_IBLINKDOWN] = crp_iblinkdown, + [QIBPORTCNTR_IBLINKERRRECOV] = crp_iblinkerrrecov, + [QIBPORTCNTR_LLI] = crp_locallinkintegrityerr, + [QIBPORTCNTR_VL15PKTDROP] = crp_vl15droppedpkt, + [QIBPORTCNTR_ERRPKEY] = crp_errpkey, + /* + * the next 3 aren't really counters, but were implemented + * as counters in older chips, so still get accessed as + * though they were counters from this code. + */ + [QIBPORTCNTR_PSINTERVAL] = krp_psinterval, + [QIBPORTCNTR_PSSTART] = krp_psstart, + [QIBPORTCNTR_PSSTAT] = krp_psstat, + /* pseudo-counter, summed for all ports */ + [QIBPORTCNTR_KHDROVFL] = 0xffff, + }; + + if (reg >= ARRAY_SIZE(xlator)) { + qib_devinfo(ppd->dd->pcidev, + "Unimplemented portcounter %u\n", reg); + goto done; + } + creg = xlator[reg] & _PORT_CNTR_IDXMASK; + + /* handle non-counters and special cases first */ + if (reg == QIBPORTCNTR_KHDROVFL) { + int i; + + /* sum over all kernel contexts (skip if mini_init) */ + for (i = 0; dd->rcd && i < dd->first_user_ctxt; i++) { + struct qib_ctxtdata *rcd = dd->rcd[i]; + + if (!rcd || rcd->ppd != ppd) + continue; + ret += read_7322_creg32(dd, cr_base_egrovfl + i); + } + goto done; + } else if (reg == QIBPORTCNTR_RXDROPPKT) { + /* + * Used as part of the synthesis of port_rcv_errors + * in the verbs code for IBTA counters. Not needed for 7322, + * because all the errors are already counted by other cntrs. + */ + goto done; + } else if (reg == QIBPORTCNTR_PSINTERVAL || + reg == QIBPORTCNTR_PSSTART || reg == QIBPORTCNTR_PSSTAT) { + /* were counters in older chips, now per-port kernel regs */ + ret = qib_read_kreg_port(ppd, creg); + goto done; + } + + /* + * Only fast increment counters are 64 bits; use 32 bit reads to + * avoid two independent reads when on Opteron. + */ + if (xlator[reg] & _PORT_64BIT_FLAG) + ret = read_7322_creg_port(ppd, creg); + else + ret = read_7322_creg32_port(ppd, creg); + if (creg == crp_ibsymbolerr) { + if (ppd->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->ibsymsnap; + ret -= ppd->cpspec->ibsymdelta; + } else if (creg == crp_iblinkerrrecov) { + if (ppd->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->iblnkerrsnap; + ret -= ppd->cpspec->iblnkerrdelta; + } else if (creg == crp_errlink) + ret -= ppd->cpspec->ibmalfdelta; + else if (creg == crp_iblinkdown) + ret += ppd->cpspec->iblnkdowndelta; +done: + return ret; +} + +/* + * Device counter names (not port-specific), one line per stat, + * single string. Used by utilities like ipathstats to print the stats + * in a way which works for different versions of drivers, without changing + * the utility. Names need to be 12 chars or less (w/o newline), for proper + * display by utility. + * Non-error counters are first. + * Start of "error" conters is indicated by a leading "E " on the first + * "error" counter, and doesn't count in label length. + * The EgrOvfl list needs to be last so we truncate them at the configured + * context count for the device. + * cntr7322indices contains the corresponding register indices. + */ +static const char cntr7322names[] = + "Interrupts\n" + "HostBusStall\n" + "E RxTIDFull\n" + "RxTIDInvalid\n" + "RxTIDFloDrop\n" /* 7322 only */ + "Ctxt0EgrOvfl\n" + "Ctxt1EgrOvfl\n" + "Ctxt2EgrOvfl\n" + "Ctxt3EgrOvfl\n" + "Ctxt4EgrOvfl\n" + "Ctxt5EgrOvfl\n" + "Ctxt6EgrOvfl\n" + "Ctxt7EgrOvfl\n" + "Ctxt8EgrOvfl\n" + "Ctxt9EgrOvfl\n" + "Ctx10EgrOvfl\n" + "Ctx11EgrOvfl\n" + "Ctx12EgrOvfl\n" + "Ctx13EgrOvfl\n" + "Ctx14EgrOvfl\n" + "Ctx15EgrOvfl\n" + "Ctx16EgrOvfl\n" + "Ctx17EgrOvfl\n" + ; + +static const u32 cntr7322indices[] = { + cr_lbint | _PORT_64BIT_FLAG, + cr_lbstall | _PORT_64BIT_FLAG, + cr_tidfull, + cr_tidinvalid, + cr_rxtidflowdrop, + cr_base_egrovfl + 0, + cr_base_egrovfl + 1, + cr_base_egrovfl + 2, + cr_base_egrovfl + 3, + cr_base_egrovfl + 4, + cr_base_egrovfl + 5, + cr_base_egrovfl + 6, + cr_base_egrovfl + 7, + cr_base_egrovfl + 8, + cr_base_egrovfl + 9, + cr_base_egrovfl + 10, + cr_base_egrovfl + 11, + cr_base_egrovfl + 12, + cr_base_egrovfl + 13, + cr_base_egrovfl + 14, + cr_base_egrovfl + 15, + cr_base_egrovfl + 16, + cr_base_egrovfl + 17, +}; + +/* + * same as cntr7322names and cntr7322indices, but for port-specific counters. + * portcntr7322indices is somewhat complicated by some registers needing + * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG + */ +static const char portcntr7322names[] = + "TxPkt\n" + "TxFlowPkt\n" + "TxWords\n" + "RxPkt\n" + "RxFlowPkt\n" + "RxWords\n" + "TxFlowStall\n" + "TxDmaDesc\n" /* 7220 and 7322-only */ + "E RxDlidFltr\n" /* 7220 and 7322-only */ + "IBStatusChng\n" + "IBLinkDown\n" + "IBLnkRecov\n" + "IBRxLinkErr\n" + "IBSymbolErr\n" + "RxLLIErr\n" + "RxBadFormat\n" + "RxBadLen\n" + "RxBufOvrfl\n" + "RxEBP\n" + "RxFlowCtlErr\n" + "RxICRCerr\n" + "RxLPCRCerr\n" + "RxVCRCerr\n" + "RxInvalLen\n" + "RxInvalPKey\n" + "RxPktDropped\n" + "TxBadLength\n" + "TxDropped\n" + "TxInvalLen\n" + "TxUnderrun\n" + "TxUnsupVL\n" + "RxLclPhyErr\n" /* 7220 and 7322-only from here down */ + "RxVL15Drop\n" + "RxVlErr\n" + "XcessBufOvfl\n" + "RxQPBadCtxt\n" /* 7322-only from here down */ + "TXBadHeader\n" + ; + +static const u32 portcntr7322indices[] = { + QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG, + crp_pktsendflow, + QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG, + QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG, + crp_pktrcvflowctrl, + QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG, + QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG, + crp_txsdmadesc | _PORT_64BIT_FLAG, + crp_rxdlidfltr, + crp_ibstatuschange, + QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_LLI | _PORT_VIRT_FLAG, + QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG, + crp_rcvflowctrlviol, + QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG, + crp_txminmaxlenerr, + crp_txdroppedpkt, + crp_txlenerr, + crp_txunderrun, + crp_txunsupvl, + QIBPORTCNTR_RXLOCALPHYERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_VL15PKTDROP | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXVLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_EXCESSBUFOVFL | _PORT_VIRT_FLAG, + crp_rxqpinvalidctxt, + crp_txhdrerr, +}; + +/* do all the setup to make the counter reads efficient later */ +static void init_7322_cntrnames(struct qib_devdata *dd) +{ + int i, j = 0; + char *s; + + for (i = 0, s = (char *)cntr7322names; s && j <= dd->cfgctxts; + i++) { + /* we always have at least one counter before the egrovfl */ + if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12)) + j = 1; + s = strchr(s + 1, '\n'); + if (s && j) + j++; + } + dd->cspec->ncntrs = i; + if (!s) + /* full list; size is without terminating null */ + dd->cspec->cntrnamelen = sizeof(cntr7322names) - 1; + else + dd->cspec->cntrnamelen = 1 + s - cntr7322names; + dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->cntrs) + qib_dev_err(dd, "Failed allocation for counters\n"); + + for (i = 0, s = (char *)portcntr7322names; s; i++) + s = strchr(s + 1, '\n'); + dd->cspec->nportcntrs = i - 1; + dd->cspec->portcntrnamelen = sizeof(portcntr7322names) - 1; + for (i = 0; i < dd->num_pports; ++i) { + dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->pport[i].cpspec->portcntrs) + qib_dev_err(dd, "Failed allocation for" + " portcounters\n"); + } +} + +static u32 qib_read_7322cntrs(struct qib_devdata *dd, loff_t pos, char **namep, + u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->cntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *) cntr7322names; + } else { + u64 *cntr = dd->cspec->cntrs; + int i; + + ret = dd->cspec->ncntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->ncntrs; i++) + if (cntr7322indices[i] & _PORT_64BIT_FLAG) + *cntr++ = read_7322_creg(dd, + cntr7322indices[i] & + _PORT_CNTR_IDXMASK); + else + *cntr++ = read_7322_creg32(dd, + cntr7322indices[i]); + } +done: + return ret; +} + +static u32 qib_read_7322portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, + char **namep, u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->portcntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *)portcntr7322names; + } else { + struct qib_pportdata *ppd = &dd->pport[port]; + u64 *cntr = ppd->cpspec->portcntrs; + int i; + + ret = dd->cspec->nportcntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->nportcntrs; i++) { + if (portcntr7322indices[i] & _PORT_VIRT_FLAG) + *cntr++ = qib_portcntr_7322(ppd, + portcntr7322indices[i] & + _PORT_CNTR_IDXMASK); + else if (portcntr7322indices[i] & _PORT_64BIT_FLAG) + *cntr++ = read_7322_creg_port(ppd, + portcntr7322indices[i] & + _PORT_CNTR_IDXMASK); + else + *cntr++ = read_7322_creg32_port(ppd, + portcntr7322indices[i]); + } + } +done: + return ret; +} + +/** + * qib_get_7322_faststats - get word counters from chip before they overflow + * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * + * VESTIGIAL IBA7322 has no "small fast counters", so the only + * real purpose of this function is to maintain the notion of + * "active time", which in turn is only logged into the eeprom, + * which we don;t have, yet, for 7322-based boards. + * + * called from add_timer + */ +static void qib_get_7322_faststats(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_pportdata *ppd; + unsigned long flags; + u64 traffic_wds; + int pidx; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + /* + * If port isn't enabled or not operational ports, or + * diags is running (can cause memory diags to fail) + * skip this port this time. + */ + if (!ppd->link_speed_supported || !(dd->flags & QIB_INITTED) + || dd->diag_client) + continue; + + /* + * Maintain an activity timer, based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + traffic_wds = qib_portcntr_7322(ppd, QIBPORTCNTR_WORDRCV) + + qib_portcntr_7322(ppd, QIBPORTCNTR_WORDSEND); + spin_lock_irqsave(&ppd->dd->eep_st_lock, flags); + traffic_wds -= ppd->dd->traffic_wds; + ppd->dd->traffic_wds += traffic_wds; + if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(ACTIVITY_TIMER, &ppd->dd->active_time); + spin_unlock_irqrestore(&ppd->dd->eep_st_lock, flags); + if (ppd->cpspec->qdr_dfe_on && (ppd->link_speed_active & + QIB_IB_QDR) && + (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE)) && + ppd->cpspec->qdr_dfe_time && + time_is_before_jiffies(ppd->cpspec->qdr_dfe_time)) { + ppd->cpspec->qdr_dfe_on = 0; + + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_INIT_R1 : + QDR_STATIC_ADAPT_INIT); + force_h1(ppd); + } + } + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); +} + +/* + * If we were using MSIx, try to fallback to INTx. + */ +static int qib_7322_intr_fallback(struct qib_devdata *dd) +{ + if (!dd->cspec->num_msix_entries) + return 0; /* already using INTx */ + + qib_devinfo(dd->pcidev, "MSIx interrupt not detected," + " trying INTx interrupts\n"); + qib_7322_nomsix(dd); + qib_enable_intx(dd->pcidev); + qib_setup_7322_interrupt(dd, 0); + return 1; +} + +/* + * Reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well, then return to previous state (which may be still in reset) + * NOTE: some callers of this "know" this writes the current value + * of cpspec->ibcctrl_a as part of it's operation, so if that changes, + * check all callers. + */ +static void qib_7322_mini_pcs_reset(struct qib_pportdata *ppd) +{ + u64 val; + struct qib_devdata *dd = ppd->dd; + const u64 reset_bits = SYM_MASK(IBPCSConfig_0, xcv_rreset) | + SYM_MASK(IBPCSConfig_0, xcv_treset) | + SYM_MASK(IBPCSConfig_0, tx_rx_reset); + + val = qib_read_kreg_port(ppd, krp_ib_pcsconfig); + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask & ~HWE_MASK(statusValidNoEop)); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a & + ~SYM_MASK(IBCCtrlA_0, IBLinkEn)); + + qib_write_kreg_port(ppd, krp_ib_pcsconfig, val | reset_bits); + qib_read_kreg32(dd, kr_scratch); + qib_write_kreg_port(ppd, krp_ib_pcsconfig, val & ~reset_bits); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + qib_write_kreg(dd, kr_hwerrclear, + SYM_MASK(HwErrClear, statusValidNoEopClear)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); +} + +/* + * This code for non-IBTA-compliant IB speed negotiation is only known to + * work for the SDR to DDR transition, and only between an HCA and a switch + * with recent firmware. It is based on observed heuristics, rather than + * actual knowledge of the non-compliant speed negotiation. + * It has a number of hard-coded fields, since the hope is to rewrite this + * when a spec is available on how the negoation is intended to work. + */ +static void autoneg_7322_sendpkt(struct qib_pportdata *ppd, u32 *hdr, + u32 dcnt, u32 *data) +{ + int i; + u64 pbc; + u32 __iomem *piobuf; + u32 pnum, control, len; + struct qib_devdata *dd = ppd->dd; + + i = 0; + len = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */ + control = qib_7322_setpbc_control(ppd, len, 0, 15); + pbc = ((u64) control << 32) | len; + while (!(piobuf = qib_7322_getsendbuf(ppd, pbc, &pnum))) { + if (i++ > 15) + return; + udelay(2); + } + /* disable header check on this packet, since it can't be valid */ + dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_DIS1, NULL); + writeq(pbc, piobuf); + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdr, 7); + qib_pio_copy(piobuf + 9, data, dcnt); + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + qib_flush_wc(); + qib_sendbuf_done(dd, pnum); + /* and re-enable hdr check */ + dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_ENAB1, NULL); +} + +/* + * _start packet gets sent twice at start, _done gets sent twice at end + */ +static void qib_autoneg_7322_send(struct qib_pportdata *ppd, int which) +{ + struct qib_devdata *dd = ppd->dd; + static u32 swapped; + u32 dw, i, hcnt, dcnt, *data; + static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba }; + static u32 madpayload_start[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */ + }; + static u32 madpayload_done[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x40000001, 0x1388, 0x15e, /* rest 0's */ + }; + + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); + if (!swapped) { + /* for maintainability, do it at runtime */ + for (i = 0; i < hcnt; i++) { + dw = (__force u32) cpu_to_be32(hdr[i]); + hdr[i] = dw; + } + for (i = 0; i < dcnt; i++) { + dw = (__force u32) cpu_to_be32(madpayload_start[i]); + madpayload_start[i] = dw; + dw = (__force u32) cpu_to_be32(madpayload_done[i]); + madpayload_done[i] = dw; + } + swapped = 1; + } + + data = which ? madpayload_done : madpayload_start; + + autoneg_7322_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); + autoneg_7322_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); +} + +/* + * Do the absolute minimum to cause an IB speed change, and make it + * ready, but don't actually trigger the change. The caller will + * do that when ready (if link is in Polling training state, it will + * happen immediately, otherwise when link next goes down) + * + * This routine should only be used as part of the DDR autonegotation + * code for devices that are not compliant with IB 1.2 (or code that + * fixes things up for same). + * + * When link has gone down, and autoneg enabled, or autoneg has + * failed and we give up until next time we set both speeds, and + * then we want IBTA enabled as well as "use max enabled speed. + */ +static void set_7322_ibspeed_fast(struct qib_pportdata *ppd, u32 speed) +{ + u64 newctrlb; + newctrlb = ppd->cpspec->ibcctrl_b & ~(IBA7322_IBC_SPEED_MASK | + IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK); + + if (speed & (speed - 1)) /* multiple speeds */ + newctrlb |= (speed << IBA7322_IBC_SPEED_LSB) | + IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + else + newctrlb |= speed == QIB_IB_QDR ? + IBA7322_IBC_SPEED_QDR | IBA7322_IBC_IBTA_1_2_MASK : + ((speed == QIB_IB_DDR ? + IBA7322_IBC_SPEED_DDR : IBA7322_IBC_SPEED_SDR)); + + if (newctrlb == ppd->cpspec->ibcctrl_b) + return; + + ppd->cpspec->ibcctrl_b = newctrlb; + qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b); + qib_write_kreg(ppd->dd, kr_scratch, 0); +} + +/* + * This routine is only used when we are not talking to another + * IB 1.2-compliant device that we think can do DDR. + * (This includes all existing switch chips as of Oct 2007.) + * 1.2-compliant devices go directly to DDR prior to reaching INIT + */ +static void try_7322_autoneg(struct qib_pportdata *ppd) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + qib_autoneg_7322_send(ppd, 0); + set_7322_ibspeed_fast(ppd, QIB_IB_DDR); + qib_7322_mini_pcs_reset(ppd); + /* 2 msec is minimum length of a poll cycle */ + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); +} + +/* + * Handle the empirically determined mechanism for auto-negotiation + * of DDR speed with switches. + */ +static void autoneg_7322_work(struct work_struct *work) +{ + struct qib_pportdata *ppd; + struct qib_devdata *dd; + u64 startms; + u32 i; + unsigned long flags; + + ppd = container_of(work, struct qib_chippport_specific, + autoneg_work.work)->ppd; + dd = ppd->dd; + + startms = jiffies_to_msecs(jiffies); + + /* + * Busy wait for this first part, it should be at most a + * few hundred usec, since we scheduled ourselves for 2msec. + */ + for (i = 0; i < 25; i++) { + if (SYM_FIELD(ppd->lastibcstat, IBCStatusA_0, LinkState) + == IB_7322_LT_STATE_POLLQUIET) { + qib_set_linkstate(ppd, QIB_IB_LINKDOWN_DISABLE); + break; + } + udelay(100); + } + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + goto done; /* we got there early or told to stop */ + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(90))) + goto done; + qib_7322_mini_pcs_reset(ppd); + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(1700))) + goto done; + qib_7322_mini_pcs_reset(ppd); + + set_7322_ibspeed_fast(ppd, QIB_IB_SDR); + + /* + * Wait up to 250 msec for link to train and get to INIT at DDR; + * this should terminate early. + */ + wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(250)); +done: + if (ppd->lflags & QIBL_IB_AUTONEG_INPROG) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + if (ppd->cpspec->autoneg_tries == AUTONEG_TRIES) { + ppd->lflags |= QIBL_IB_AUTONEG_FAILED; + ppd->cpspec->autoneg_tries = 0; + } + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); + } +} + +/* + * This routine is used to request IPG set in the QLogic switch. + * Only called if r1. + */ +static void try_7322_ipg(struct qib_pportdata *ppd) +{ + struct qib_ibport *ibp = &ppd->ibport_data; + struct ib_mad_send_buf *send_buf; + struct ib_mad_agent *agent; + struct ib_smp *smp; + unsigned delay; + int ret; + + agent = ibp->send_agent; + if (!agent) + goto retry; + + send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + goto retry; + + if (!ibp->smi_ah) { + struct ib_ah_attr attr; + struct ib_ah *ah; + + memset(&attr, 0, sizeof attr); + attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE); + attr.port_num = ppd->port; + ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + if (IS_ERR(ah)) + ret = -EINVAL; + else { + send_buf->ah = ah; + ibp->smi_ah = to_iah(ah); + ret = 0; + } + } else { + send_buf->ah = &ibp->smi_ah->ibah; + ret = 0; + } + + smp = send_buf->mad; + smp->base_version = IB_MGMT_BASE_VERSION; + smp->mgmt_class = IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE; + smp->class_version = 1; + smp->method = IB_MGMT_METHOD_SEND; + smp->hop_cnt = 1; + smp->attr_id = QIB_VENDOR_IPG; + smp->attr_mod = 0; + + if (!ret) + ret = ib_post_send_mad(send_buf, NULL); + if (ret) + ib_free_send_mad(send_buf); +retry: + delay = 2 << ppd->cpspec->ipg_tries; + queue_delayed_work(ib_wq, &ppd->cpspec->ipg_work, + msecs_to_jiffies(delay)); +} + +/* + * Timeout handler for setting IPG. + * Only called if r1. + */ +static void ipg_7322_work(struct work_struct *work) +{ + struct qib_pportdata *ppd; + + ppd = container_of(work, struct qib_chippport_specific, + ipg_work.work)->ppd; + if ((ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | QIBL_LINKACTIVE)) + && ++ppd->cpspec->ipg_tries <= 10) + try_7322_ipg(ppd); +} + +static u32 qib_7322_iblink_state(u64 ibcs) +{ + u32 state = (u32)SYM_FIELD(ibcs, IBCStatusA_0, LinkState); + + switch (state) { + case IB_7322_L_STATE_INIT: + state = IB_PORT_INIT; + break; + case IB_7322_L_STATE_ARM: + state = IB_PORT_ARMED; + break; + case IB_7322_L_STATE_ACTIVE: + /* fall through */ + case IB_7322_L_STATE_ACT_DEFER: + state = IB_PORT_ACTIVE; + break; + default: /* fall through */ + case IB_7322_L_STATE_DOWN: + state = IB_PORT_DOWN; + break; + } + return state; +} + +/* returns the IBTA port state, rather than the IBC link training state */ +static u8 qib_7322_phys_portstate(u64 ibcs) +{ + u8 state = (u8)SYM_FIELD(ibcs, IBCStatusA_0, LinkTrainingState); + return qib_7322_physportstate[state]; +} + +static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) +{ + int ret = 0, symadj = 0; + unsigned long flags; + int mult; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + /* Update our picture of width and speed from chip */ + if (ibcs & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) { + ppd->link_speed_active = QIB_IB_QDR; + mult = 4; + } else if (ibcs & SYM_MASK(IBCStatusA_0, LinkSpeedActive)) { + ppd->link_speed_active = QIB_IB_DDR; + mult = 2; + } else { + ppd->link_speed_active = QIB_IB_SDR; + mult = 1; + } + if (ibcs & SYM_MASK(IBCStatusA_0, LinkWidthActive)) { + ppd->link_width_active = IB_WIDTH_4X; + mult *= 4; + } else + ppd->link_width_active = IB_WIDTH_1X; + ppd->delay_mult = ib_rate_to_delay[mult_to_ib_rate(mult)]; + + if (!ibup) { + u64 clr; + + /* Link went down. */ + /* do IPG MAD again after linkdown, even if last time failed */ + ppd->cpspec->ipg_tries = 0; + clr = qib_read_kreg_port(ppd, krp_ibcstatus_b) & + (SYM_MASK(IBCStatusB_0, heartbeat_timed_out) | + SYM_MASK(IBCStatusB_0, heartbeat_crosstalk)); + if (clr) + qib_write_kreg_port(ppd, krp_ibcstatus_b, clr); + if (!(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG))) + set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + struct qib_qsfp_data *qd = + &ppd->cpspec->qsfp_data; + /* unlock the Tx settings, speed may change */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + qib_cancel_sends(ppd); + /* on link down, ensure sane pcs state */ + qib_7322_mini_pcs_reset(ppd); + /* schedule the qsfp refresh which should turn the link + off */ + if (ppd->dd->flags & QIB_HAS_QSFP) { + qd->t_insert = jiffies; + queue_work(ib_wq, &qd->work); + } + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (__qib_sdma_running(ppd)) + __qib_sdma_process_event(ppd, + qib_sdma_event_e70_go_idle); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + clr = read_7322_creg32_port(ppd, crp_iblinkdown); + if (clr == ppd->cpspec->iblnkdownsnap) + ppd->cpspec->iblnkdowndelta++; + } else { + if (qib_compat_ddr_negotiate && + !(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG)) && + ppd->link_speed_active == QIB_IB_SDR && + (ppd->link_speed_enabled & QIB_IB_DDR) + && ppd->cpspec->autoneg_tries < AUTONEG_TRIES) { + /* we are SDR, and auto-negotiation enabled */ + ++ppd->cpspec->autoneg_tries; + if (!ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymdelta += + read_7322_creg32_port(ppd, + crp_ibsymbolerr) - + ppd->cpspec->ibsymsnap; + ppd->cpspec->iblnkerrdelta += + read_7322_creg32_port(ppd, + crp_iblinkerrrecov) - + ppd->cpspec->iblnkerrsnap; + } + try_7322_autoneg(ppd); + ret = 1; /* no other IB status change processing */ + } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + ppd->link_speed_active == QIB_IB_SDR) { + qib_autoneg_7322_send(ppd, 1); + set_7322_ibspeed_fast(ppd, QIB_IB_DDR); + qib_7322_mini_pcs_reset(ppd); + udelay(2); + ret = 1; /* no other IB status change processing */ + } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + (ppd->link_speed_active & QIB_IB_DDR)) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~(QIBL_IB_AUTONEG_INPROG | + QIBL_IB_AUTONEG_FAILED); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + ppd->cpspec->autoneg_tries = 0; + /* re-enable SDR, for next link down */ + set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); + wake_up(&ppd->cpspec->autoneg_wait); + symadj = 1; + } else if (ppd->lflags & QIBL_IB_AUTONEG_FAILED) { + /* + * Clear autoneg failure flag, and do setup + * so we'll try next time link goes down and + * back to INIT (possibly connected to a + * different device). + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + ppd->cpspec->ibcctrl_b |= IBA7322_IBC_IBTA_1_2_MASK; + symadj = 1; + } + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + symadj = 1; + if (ppd->dd->cspec->r1 && ppd->cpspec->ipg_tries <= 10) + try_7322_ipg(ppd); + if (!ppd->cpspec->recovery_init) + setup_7322_link_recovery(ppd, 0); + ppd->cpspec->qdr_dfe_time = jiffies + + msecs_to_jiffies(QDR_DFE_DISABLE_DELAY); + } + ppd->cpspec->ibmalfusesnap = 0; + ppd->cpspec->ibmalfsnap = read_7322_creg32_port(ppd, + crp_errlink); + } + if (symadj) { + ppd->cpspec->iblnkdownsnap = + read_7322_creg32_port(ppd, crp_iblinkdown); + if (ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 0; + ppd->cpspec->ibsymdelta += read_7322_creg32_port(ppd, + crp_ibsymbolerr) - ppd->cpspec->ibsymsnap; + ppd->cpspec->iblnkerrdelta += read_7322_creg32_port(ppd, + crp_iblinkerrrecov) - ppd->cpspec->iblnkerrsnap; + } + } else if (!ibup && qib_compat_ddr_negotiate && + !ppd->cpspec->ibdeltainprog && + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, + crp_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7322_creg32_port(ppd, + crp_iblinkerrrecov); + } + + if (!ret) + qib_setup_7322_setextled(ppd, ibup); + return ret; +} + +/* + * Does read/modify/write to appropriate registers to + * set output and direction bits selected by mask. + * these are in their canonical postions (e.g. lsb of + * dir will end up in D48 of extctrl on existing chips). + * returns contents of GP Inputs. + */ +static int gpio_7322_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask) +{ + u64 read_val, new_out; + unsigned long flags; + + if (mask) { + /* some bits being written, lock access to GPIO */ + dir &= mask; + out &= mask; + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe)); + dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe)); + new_out = (dd->cspec->gpio_out & ~mask) | out; + + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_out, new_out); + dd->cspec->gpio_out = new_out; + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + } + /* + * It is unlikely that a read at this time would get valid + * data on a pin whose direction line was set in the same + * call to this function. We include the read here because + * that allows us to potentially combine a change on one pin with + * a read on another, and because the old code did something like + * this. + */ + read_val = qib_read_kreg64(dd, kr_extstatus); + return SYM_FIELD(read_val, EXTStatus, GPIOIn); +} + +/* Enable writes to config EEPROM, if possible. Returns previous state */ +static int qib_7322_eeprom_wen(struct qib_devdata *dd, int wen) +{ + int prev_wen; + u32 mask; + + mask = 1 << QIB_EEPROM_WEN_NUM; + prev_wen = ~gpio_7322_mod(dd, 0, 0, 0) >> QIB_EEPROM_WEN_NUM; + gpio_7322_mod(dd, wen ? 0 : mask, mask, mask); + + return prev_wen & 1; +} + +/* + * Read fundamental info we need to use the chip. These are + * the registers that describe chip capabilities, and are + * saved in shadow registers. + */ +static void get_7322_chip_params(struct qib_devdata *dd) +{ + u64 val; + u32 piobufs; + int mtu; + + dd->palign = qib_read_kreg32(dd, kr_pagealign); + + dd->uregbase = qib_read_kreg32(dd, kr_userregbase); + + dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt); + dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase); + dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase); + dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase); + dd->pio2k_bufbase = dd->piobufbase & 0xffffffff; + + val = qib_read_kreg64(dd, kr_sendpiobufcnt); + dd->piobcnt2k = val & ~0U; + dd->piobcnt4k = val >> 32; + val = qib_read_kreg64(dd, kr_sendpiosize); + dd->piosize2k = val & ~0U; + dd->piosize4k = val >> 32; + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + dd->pport[0].ibmtu = (u32)mtu; + dd->pport[1].ibmtu = (u32)mtu; + + /* these may be adjusted in init_chip_wc_pat() */ + dd->pio2kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + dd->pio2k_bufbase); + dd->pio4kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + + (dd->piobufbase >> 32)); + /* + * 4K buffers take 2 pages; we use roundup just to be + * paranoid; we calculate it once here, rather than on + * ever buf allocate + */ + dd->align4k = ALIGN(dd->piosize4k, dd->palign); + + piobufs = dd->piobcnt4k + dd->piobcnt2k + NUM_VL15_BUFS; + + dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / + (sizeof(u64) * BITS_PER_BYTE / 2); +} + +/* + * The chip base addresses in cspec and cpspec have to be set + * after possible init_chip_wc_pat(), rather than in + * get_7322_chip_params(), so split out as separate function + */ +static void qib_7322_set_baseaddrs(struct qib_devdata *dd) +{ + u32 cregbase; + cregbase = qib_read_kreg32(dd, kr_counterregbase); + + dd->cspec->cregbase = (u64 __iomem *)(cregbase + + (char __iomem *)dd->kregbase); + + dd->egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + dd->rcvegrbase); + + /* port registers are defined as relative to base of chip */ + dd->pport[0].cpspec->kpregbase = + (u64 __iomem *)((char __iomem *)dd->kregbase); + dd->pport[1].cpspec->kpregbase = + (u64 __iomem *)(dd->palign + + (char __iomem *)dd->kregbase); + dd->pport[0].cpspec->cpregbase = + (u64 __iomem *)(qib_read_kreg_port(&dd->pport[0], + kr_counterregbase) + (char __iomem *)dd->kregbase); + dd->pport[1].cpspec->cpregbase = + (u64 __iomem *)(qib_read_kreg_port(&dd->pport[1], + kr_counterregbase) + (char __iomem *)dd->kregbase); +} + +/* + * This is a fairly special-purpose observer, so we only support + * the port-specific parts of SendCtrl + */ + +#define SENDCTRL_SHADOWED (SYM_MASK(SendCtrl_0, SendEnable) | \ + SYM_MASK(SendCtrl_0, SDmaEnable) | \ + SYM_MASK(SendCtrl_0, SDmaIntEnable) | \ + SYM_MASK(SendCtrl_0, SDmaSingleDescriptor) | \ + SYM_MASK(SendCtrl_0, SDmaHalt) | \ + SYM_MASK(SendCtrl_0, IBVLArbiterEn) | \ + SYM_MASK(SendCtrl_0, ForceCreditUpToDate)) + +static int sendctrl_hook(struct qib_devdata *dd, + const struct diag_observer *op, u32 offs, + u64 *data, u64 mask, int only_32) +{ + unsigned long flags; + unsigned idx; + unsigned pidx; + struct qib_pportdata *ppd = NULL; + u64 local_data, all_bits; + + /* + * The fixed correspondence between Physical ports and pports is + * severed. We need to hunt for the ppd that corresponds + * to the offset we got. And we have to do that without admitting + * we know the stride, apparently. + */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + u64 __iomem *psptr; + u32 psoffs; + + ppd = dd->pport + pidx; + if (!ppd->cpspec->kpregbase) + continue; + + psptr = ppd->cpspec->kpregbase + krp_sendctrl; + psoffs = (u32) (psptr - dd->kregbase) * sizeof(*psptr); + if (psoffs == offs) + break; + } + + /* If pport is not being managed by driver, just avoid shadows. */ + if (pidx >= dd->num_pports) + ppd = NULL; + + /* In any case, "idx" is flat index in kreg space */ + idx = offs / sizeof(u64); + + all_bits = ~0ULL; + if (only_32) + all_bits >>= 32; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (!ppd || (mask & all_bits) != all_bits) { + /* + * At least some mask bits are zero, so we need + * to read. The judgement call is whether from + * reg or shadow. First-cut: read reg, and complain + * if any bits which should be shadowed are different + * from their shadowed value. + */ + if (only_32) + local_data = (u64)qib_read_kreg32(dd, idx); + else + local_data = qib_read_kreg64(dd, idx); + *data = (local_data & ~mask) | (*data & mask); + } + if (mask) { + /* + * At least some mask bits are one, so we need + * to write, but only shadow some bits. + */ + u64 sval, tval; /* Shadowed, transient */ + + /* + * New shadow val is bits we don't want to touch, + * ORed with bits we do, that are intended for shadow. + */ + if (ppd) { + sval = ppd->p_sendctrl & ~mask; + sval |= *data & SENDCTRL_SHADOWED & mask; + ppd->p_sendctrl = sval; + } else + sval = *data & SENDCTRL_SHADOWED & mask; + tval = sval | (*data & ~SENDCTRL_SHADOWED & mask); + qib_write_kreg(dd, idx, tval); + qib_write_kreg(dd, kr_scratch, 0Ull); + } + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + return only_32 ? 4 : 8; +} + +static const struct diag_observer sendctrl_0_observer = { + sendctrl_hook, KREG_IDX(SendCtrl_0) * sizeof(u64), + KREG_IDX(SendCtrl_0) * sizeof(u64) +}; + +static const struct diag_observer sendctrl_1_observer = { + sendctrl_hook, KREG_IDX(SendCtrl_1) * sizeof(u64), + KREG_IDX(SendCtrl_1) * sizeof(u64) +}; + +static ushort sdma_fetch_prio = 8; +module_param_named(sdma_fetch_prio, sdma_fetch_prio, ushort, S_IRUGO); +MODULE_PARM_DESC(sdma_fetch_prio, "SDMA descriptor fetch priority"); + +/* Besides logging QSFP events, we set appropriate TxDDS values */ +static void init_txdds_table(struct qib_pportdata *ppd, int override); + +static void qsfp_7322_event(struct work_struct *work) +{ + struct qib_qsfp_data *qd; + struct qib_pportdata *ppd; + unsigned long pwrup; + unsigned long flags; + int ret; + u32 le2; + + qd = container_of(work, struct qib_qsfp_data, work); + ppd = qd->ppd; + pwrup = qd->t_insert + + msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC); + + /* Delay for 20 msecs to allow ModPrs resistor to setup */ + mdelay(QSFP_MODPRS_LAG_MSEC); + + if (!qib_qsfp_mod_present(ppd)) { + ppd->cpspec->qsfp_data.modpresent = 0; + /* Set the physical link to disabled */ + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else { + /* + * Some QSFP's not only do not respond until the full power-up + * time, but may behave badly if we try. So hold off responding + * to insertion. + */ + while (1) { + if (time_is_before_jiffies(pwrup)) + break; + msleep(20); + } + + ret = qib_refresh_qsfp_cache(ppd, &qd->cache); + + /* + * Need to change LE2 back to defaults if we couldn't + * read the cable type (to handle cable swaps), so do this + * even on failure to read cable information. We don't + * get here for QME, so IS_QME check not needed here. + */ + if (!ret && !ppd->dd->cspec->r1) { + if (QSFP_IS_ACTIVE_FAR(qd->cache.tech)) + le2 = LE2_QME; + else if (qd->cache.atten[1] >= qib_long_atten && + QSFP_IS_CU(qd->cache.tech)) + le2 = LE2_5m; + else + le2 = LE2_DEFAULT; + } else + le2 = LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); + /* + * We always change parameteters, since we can choose + * values for cables without eeproms, and the cable may have + * changed from a cable with full or partial eeprom content + * to one with partial or no content. + */ + init_txdds_table(ppd, 0); + /* The physical link is being re-enabled only when the + * previous state was DISABLED and the VALID bit is not + * set. This should only happen when the cable has been + * physically pulled. */ + if (!ppd->cpspec->qsfp_data.modpresent && + (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) { + ppd->cpspec->qsfp_data.modpresent = 1; + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + } +} + +/* + * There is little we can do but complain to the user if QSFP + * initialization fails. + */ +static void qib_init_7322_qsfp(struct qib_pportdata *ppd) +{ + unsigned long flags; + struct qib_qsfp_data *qd = &ppd->cpspec->qsfp_data; + struct qib_devdata *dd = ppd->dd; + u64 mod_prs_bit = QSFP_GPIO_MOD_PRS_N; + + mod_prs_bit <<= (QSFP_GPIO_PORT2_SHIFT * ppd->hw_pidx); + qd->ppd = ppd; + qib_qsfp_init(qd, qsfp_7322_event); + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl |= (mod_prs_bit << SYM_LSB(EXTCtrl, GPIOInvert)); + dd->cspec->gpio_mask |= mod_prs_bit; + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); +} + +/* + * called at device initialization time, and also if the txselect + * module parameter is changed. This is used for cables that don't + * have valid QSFP EEPROMs (not present, or attenuation is zero). + * We initialize to the default, then if there is a specific + * unit,port match, we use that (and set it immediately, for the + * current speed, if the link is at INIT or better). + * String format is "default# unit#,port#=# ... u,p=#", separators must + * be a SPACE character. A newline terminates. The u,p=# tuples may + * optionally have "u,p=#,#", where the final # is the H1 value + * The last specific match is used (actually, all are used, but last + * one is the one that winds up set); if none at all, fall back on default. + */ +static void set_no_qsfp_atten(struct qib_devdata *dd, int change) +{ + char *nxt, *str; + u32 pidx, unit, port, deflt, h1; + unsigned long val; + int any = 0, seth1; + int txdds_size; + + str = txselect_list; + + /* default number is validated in setup_txselect() */ + deflt = simple_strtoul(str, &nxt, 0); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + dd->pport[pidx].cpspec->no_eep = deflt; + + txdds_size = TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ; + if (IS_QME(dd) || IS_QMH(dd)) + txdds_size += TXDDS_MFG_SZ; + + while (*nxt && nxt[1]) { + str = ++nxt; + unit = simple_strtoul(str, &nxt, 0); + if (nxt == str || !*nxt || *nxt != ',') { + while (*nxt && *nxt++ != ' ') /* skip to next, if any */ + ; + continue; + } + str = ++nxt; + port = simple_strtoul(str, &nxt, 0); + if (nxt == str || *nxt != '=') { + while (*nxt && *nxt++ != ' ') /* skip to next, if any */ + ; + continue; + } + str = ++nxt; + val = simple_strtoul(str, &nxt, 0); + if (nxt == str) { + while (*nxt && *nxt++ != ' ') /* skip to next, if any */ + ; + continue; + } + if (val >= txdds_size) + continue; + seth1 = 0; + h1 = 0; /* gcc thinks it might be used uninitted */ + if (*nxt == ',' && nxt[1]) { + str = ++nxt; + h1 = (u32)simple_strtoul(str, &nxt, 0); + if (nxt == str) + while (*nxt && *nxt++ != ' ') /* skip */ + ; + else + seth1 = 1; + } + for (pidx = 0; dd->unit == unit && pidx < dd->num_pports; + ++pidx) { + struct qib_pportdata *ppd = &dd->pport[pidx]; + + if (ppd->port != port || !ppd->link_speed_supported) + continue; + ppd->cpspec->no_eep = val; + if (seth1) + ppd->cpspec->h1_val = h1; + /* now change the IBC and serdes, overriding generic */ + init_txdds_table(ppd, 1); + /* Re-enable the physical state machine on mezz boards + * now that the correct settings have been set. + * QSFP boards are handles by the QSFP event handler */ + if (IS_QMH(dd) || IS_QME(dd)) + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); + any++; + } + if (*nxt == '\n') + break; /* done */ + } + if (change && !any) { + /* no specific setting, use the default. + * Change the IBC and serdes, but since it's + * general, don't override specific settings. + */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + init_txdds_table(&dd->pport[pidx], 0); + } +} + +/* handle the txselect parameter changing */ +static int setup_txselect(const char *str, struct kernel_param *kp) +{ + struct qib_devdata *dd; + unsigned long val; + char *n; + if (strlen(str) >= MAX_ATTEN_LEN) { + printk(KERN_INFO QIB_DRV_NAME " txselect_values string " + "too long\n"); + return -ENOSPC; + } + val = simple_strtoul(str, &n, 0); + if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + TXDDS_MFG_SZ)) { + printk(KERN_INFO QIB_DRV_NAME + "txselect_values must start with a number < %d\n", + TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); + return -EINVAL; + } + strcpy(txselect_list, str); + + list_for_each_entry(dd, &qib_dev_list, list) + if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) + set_no_qsfp_atten(dd, 1); + return 0; +} + +/* + * Write the final few registers that depend on some of the + * init setup. Done late in init, just before bringing up + * the serdes. + */ +static int qib_late_7322_initreg(struct qib_devdata *dd) +{ + int ret = 0, n; + u64 val; + + qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize); + qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize); + qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt); + qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); + val = qib_read_kreg64(dd, kr_sendpioavailaddr); + if (val != dd->pioavailregs_phys) { + qib_dev_err(dd, "Catastrophic software error, " + "SendPIOAvailAddr written as %lx, " + "read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); + ret = -EINVAL; + } + + n = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS; + qib_7322_txchk_change(dd, 0, n, TXCHK_CHG_TYPE_KERN, NULL); + /* driver sends get pkey, lid, etc. checking also, to catch bugs */ + qib_7322_txchk_change(dd, 0, n, TXCHK_CHG_TYPE_ENAB1, NULL); + + qib_register_observer(dd, &sendctrl_0_observer); + qib_register_observer(dd, &sendctrl_1_observer); + + dd->control &= ~QLOGIC_IB_C_SDMAFETCHPRIOEN; + qib_write_kreg(dd, kr_control, dd->control); + /* + * Set SendDmaFetchPriority and init Tx params, including + * QSFP handler on boards that have QSFP. + * First set our default attenuation entry for cables that + * don't have valid attenuation. + */ + set_no_qsfp_atten(dd, 0); + for (n = 0; n < dd->num_pports; ++n) { + struct qib_pportdata *ppd = dd->pport + n; + + qib_write_kreg_port(ppd, krp_senddmaprioritythld, + sdma_fetch_prio & 0xf); + /* Initialize qsfp if present on board. */ + if (dd->flags & QIB_HAS_QSFP) + qib_init_7322_qsfp(ppd); + } + dd->control |= QLOGIC_IB_C_SDMAFETCHPRIOEN; + qib_write_kreg(dd, kr_control, dd->control); + + return ret; +} + +/* per IB port errors. */ +#define SENDCTRL_PIBP (MASK_ACROSS(0, 1) | MASK_ACROSS(3, 3) | \ + MASK_ACROSS(8, 15)) +#define RCVCTRL_PIBP (MASK_ACROSS(0, 17) | MASK_ACROSS(39, 41)) +#define ERRS_PIBP (MASK_ACROSS(57, 58) | MASK_ACROSS(54, 54) | \ + MASK_ACROSS(36, 49) | MASK_ACROSS(29, 34) | MASK_ACROSS(14, 17) | \ + MASK_ACROSS(0, 11)) + +/* + * Write the initialization per-port registers that need to be done at + * driver load and after reset completes (i.e., that aren't done as part + * of other init procedures called from qib_init.c). + * Some of these should be redundant on reset, but play safe. + */ +static void write_7322_init_portregs(struct qib_pportdata *ppd) +{ + u64 val; + int i; + + if (!ppd->link_speed_supported) { + /* no buffer credits for this port */ + for (i = 1; i < 8; i++) + qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, 0); + qib_write_kreg_port(ppd, krp_ibcctrl_b, 0); + qib_write_kreg(ppd->dd, kr_scratch, 0); + return; + } + + /* + * Set the number of supported virtual lanes in IBC, + * for flow control packet handling on unsupported VLs + */ + val = qib_read_kreg_port(ppd, krp_ibsdtestiftx); + val &= ~SYM_MASK(IB_SDTEST_IF_TX_0, VL_CAP); + val |= (u64)(ppd->vls_supported - 1) << + SYM_LSB(IB_SDTEST_IF_TX_0, VL_CAP); + qib_write_kreg_port(ppd, krp_ibsdtestiftx, val); + + qib_write_kreg_port(ppd, krp_rcvbthqp, QIB_KD_QP); + + /* enable tx header checking */ + qib_write_kreg_port(ppd, krp_sendcheckcontrol, IBA7322_SENDCHK_PKEY | + IBA7322_SENDCHK_BTHQP | IBA7322_SENDCHK_SLID | + IBA7322_SENDCHK_RAW_IPV6 | IBA7322_SENDCHK_MINSZ); + + qib_write_kreg_port(ppd, krp_ncmodectrl, + SYM_MASK(IBNCModeCtrl_0, ScrambleCapLocal)); + + /* + * Unconditionally clear the bufmask bits. If SDMA is + * enabled, we'll set them appropriately later. + */ + qib_write_kreg_port(ppd, krp_senddmabufmask0, 0); + qib_write_kreg_port(ppd, krp_senddmabufmask1, 0); + qib_write_kreg_port(ppd, krp_senddmabufmask2, 0); + if (ppd->dd->cspec->r1) + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, ForceCreditUpToDate); +} + +/* + * Write the initialization per-device registers that need to be done at + * driver load and after reset completes (i.e., that aren't done as part + * of other init procedures called from qib_init.c). Also write per-port + * registers that are affected by overall device config, such as QP mapping + * Some of these should be redundant on reset, but play safe. + */ +static void write_7322_initregs(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + int i, pidx; + u64 val; + + /* Set Multicast QPs received by port 2 to map to context one. */ + qib_write_kreg(dd, KREG_IDX(RcvQPMulticastContext_1), 1); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + unsigned n, regno; + unsigned long flags; + + if (dd->n_krcv_queues < 2 || + !dd->pport[pidx].link_speed_supported) + continue; + + ppd = &dd->pport[pidx]; + + /* be paranoid against later code motion, etc. */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvQPMapEnable); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* Initialize QP to context mapping */ + regno = krp_rcvqpmaptable; + val = 0; + if (dd->num_pports > 1) + n = dd->first_user_ctxt / dd->num_pports; + else + n = dd->first_user_ctxt - 1; + for (i = 0; i < 32; ) { + unsigned ctxt; + + if (dd->num_pports > 1) + ctxt = (i % n) * dd->num_pports + pidx; + else if (i % n) + ctxt = (i % n) + 1; + else + ctxt = ppd->hw_pidx; + val |= ctxt << (5 * (i % 6)); + i++; + if (i % 6 == 0) { + qib_write_kreg_port(ppd, regno, val); + val = 0; + regno++; + } + } + qib_write_kreg_port(ppd, regno, val); + } + + /* + * Setup up interrupt mitigation for kernel contexts, but + * not user contexts (user contexts use interrupts when + * stalled waiting for any packet, so want those interrupts + * right away). + */ + for (i = 0; i < dd->first_user_ctxt; i++) { + dd->cspec->rcvavail_timeout[i] = rcv_int_timeout; + qib_write_kreg(dd, kr_rcvavailtimeout + i, rcv_int_timeout); + } + + /* + * Initialize as (disabled) rcvflow tables. Application code + * will setup each flow as it uses the flow. + * Doesn't clear any of the error bits that might be set. + */ + val = TIDFLOW_ERRBITS; /* these are W1C */ + for (i = 0; i < dd->cfgctxts; i++) { + int flow; + for (flow = 0; flow < NUM_TIDFLOWS_CTXT; flow++) + qib_write_ureg(dd, ur_rcvflowtable+flow, val, i); + } + + /* + * dual cards init to dual port recovery, single port cards to + * the one port. Dual port cards may later adjust to 1 port, + * and then back to dual port if both ports are connected + * */ + if (dd->num_pports) + setup_7322_link_recovery(dd->pport, dd->num_pports > 1); +} + +static int qib_init_7322_variables(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + unsigned features, pidx, sbufcnt; + int ret, mtu; + u32 sbufs, updthresh; + + /* pport structs are contiguous, allocated after devdata */ + ppd = (struct qib_pportdata *)(dd + 1); + dd->pport = ppd; + ppd[0].dd = dd; + ppd[1].dd = dd; + + dd->cspec = (struct qib_chip_specific *)(ppd + 2); + + ppd[0].cpspec = (struct qib_chippport_specific *)(dd->cspec + 1); + ppd[1].cpspec = &ppd[0].cpspec[1]; + ppd[0].cpspec->ppd = &ppd[0]; /* for autoneg_7322_work() */ + ppd[1].cpspec->ppd = &ppd[1]; /* for autoneg_7322_work() */ + + spin_lock_init(&dd->cspec->rcvmod_lock); + spin_lock_init(&dd->cspec->gpio_lock); + + /* we haven't yet set QIB_PRESENT, so use read directly */ + dd->revision = readq(&dd->kregbase[kr_revision]); + + if ((dd->revision & 0xffffffffU) == 0xffffffffU) { + qib_dev_err(dd, "Revision register read failure, " + "giving up initialization\n"); + ret = -ENODEV; + goto bail; + } + dd->flags |= QIB_PRESENT; /* now register routines work */ + + dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R, ChipRevMajor); + dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R, ChipRevMinor); + dd->cspec->r1 = dd->minrev == 1; + + get_7322_chip_params(dd); + features = qib_7322_boardname(dd); + + /* now that piobcnt2k and 4k set, we can allocate these */ + sbufcnt = dd->piobcnt2k + dd->piobcnt4k + + NUM_VL15_BUFS + BITS_PER_LONG - 1; + sbufcnt /= BITS_PER_LONG; + dd->cspec->sendchkenable = kmalloc(sbufcnt * + sizeof(*dd->cspec->sendchkenable), GFP_KERNEL); + dd->cspec->sendgrhchk = kmalloc(sbufcnt * + sizeof(*dd->cspec->sendgrhchk), GFP_KERNEL); + dd->cspec->sendibchk = kmalloc(sbufcnt * + sizeof(*dd->cspec->sendibchk), GFP_KERNEL); + if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk || + !dd->cspec->sendibchk) { + qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n"); + ret = -ENOMEM; + goto bail; + } + + ppd = dd->pport; + + /* + * GPIO bits for TWSI data and clock, + * used for serial EEPROM. + */ + dd->gpio_sda_num = _QIB_GPIO_SDA_NUM; + dd->gpio_scl_num = _QIB_GPIO_SCL_NUM; + dd->twsi_eeprom_dev = QIB_TWSI_EEPROM_DEV; + + dd->flags |= QIB_HAS_INTX | QIB_HAS_LINK_LATENCY | + QIB_NODMA_RTAIL | QIB_HAS_VLSUPP | QIB_HAS_HDRSUPP | + QIB_HAS_THRESH_UPDATE | + (sdma_idle_cnt ? QIB_HAS_SDMA_TIMEOUT : 0); + dd->flags |= qib_special_trigger ? + QIB_USE_SPCL_TRIG : QIB_HAS_SEND_DMA; + + /* + * Setup initial values. These may change when PAT is enabled, but + * we need these to do initial chip register accesses. + */ + qib_7322_set_baseaddrs(dd); + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + + dd->cspec->int_enable_mask = QIB_I_BITSEXTANT; + /* all hwerrors become interrupts, unless special purposed */ + dd->cspec->hwerrmask = ~0ULL; + /* link_recovery setup causes these errors, so ignore them, + * other than clearing them when they occur */ + dd->cspec->hwerrmask &= + ~(SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_0) | + SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_1) | + HWE_MASK(LATriggered)); + + for (pidx = 0; pidx < NUM_IB_PORTS; ++pidx) { + struct qib_chippport_specific *cp = ppd->cpspec; + ppd->link_speed_supported = features & PORT_SPD_CAP; + features >>= PORT_SPD_CAP_SHIFT; + if (!ppd->link_speed_supported) { + /* single port mode (7340, or configured) */ + dd->skip_kctxt_mask |= 1 << pidx; + if (pidx == 0) { + /* Make sure port is disabled. */ + qib_write_kreg_port(ppd, krp_rcvctrl, 0); + qib_write_kreg_port(ppd, krp_ibcctrl_a, 0); + ppd[0] = ppd[1]; + dd->cspec->hwerrmask &= ~(SYM_MASK(HwErrMask, + IBSerdesPClkNotDetectMask_0) + | SYM_MASK(HwErrMask, + SDmaMemReadErrMask_0)); + dd->cspec->int_enable_mask &= ~( + SYM_MASK(IntMask, SDmaCleanupDoneMask_0) | + SYM_MASK(IntMask, SDmaIdleIntMask_0) | + SYM_MASK(IntMask, SDmaProgressIntMask_0) | + SYM_MASK(IntMask, SDmaIntMask_0) | + SYM_MASK(IntMask, ErrIntMask_0) | + SYM_MASK(IntMask, SendDoneIntMask_0)); + } else { + /* Make sure port is disabled. */ + qib_write_kreg_port(ppd, krp_rcvctrl, 0); + qib_write_kreg_port(ppd, krp_ibcctrl_a, 0); + dd->cspec->hwerrmask &= ~(SYM_MASK(HwErrMask, + IBSerdesPClkNotDetectMask_1) + | SYM_MASK(HwErrMask, + SDmaMemReadErrMask_1)); + dd->cspec->int_enable_mask &= ~( + SYM_MASK(IntMask, SDmaCleanupDoneMask_1) | + SYM_MASK(IntMask, SDmaIdleIntMask_1) | + SYM_MASK(IntMask, SDmaProgressIntMask_1) | + SYM_MASK(IntMask, SDmaIntMask_1) | + SYM_MASK(IntMask, ErrIntMask_1) | + SYM_MASK(IntMask, SendDoneIntMask_1)); + } + continue; + } + + dd->num_pports++; + qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + + ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + ppd->link_width_enabled = IB_WIDTH_4X; + ppd->link_speed_enabled = ppd->link_speed_supported; + /* + * Set the initial values to reasonable default, will be set + * for real when link is up. + */ + ppd->link_width_active = IB_WIDTH_4X; + ppd->link_speed_active = QIB_IB_SDR; + ppd->delay_mult = ib_rate_to_delay[IB_RATE_10_GBPS]; + switch (qib_num_cfg_vls) { + case 1: + ppd->vls_supported = IB_VL_VL0; + break; + case 2: + ppd->vls_supported = IB_VL_VL0_1; + break; + default: + qib_devinfo(dd->pcidev, + "Invalid num_vls %u, using 4 VLs\n", + qib_num_cfg_vls); + qib_num_cfg_vls = 4; + /* fall through */ + case 4: + ppd->vls_supported = IB_VL_VL0_3; + break; + case 8: + if (mtu <= 2048) + ppd->vls_supported = IB_VL_VL0_7; + else { + qib_devinfo(dd->pcidev, + "Invalid num_vls %u for MTU %d " + ", using 4 VLs\n", + qib_num_cfg_vls, mtu); + ppd->vls_supported = IB_VL_VL0_3; + qib_num_cfg_vls = 4; + } + break; + } + ppd->vls_operational = ppd->vls_supported; + + init_waitqueue_head(&cp->autoneg_wait); + INIT_DELAYED_WORK(&cp->autoneg_work, + autoneg_7322_work); + if (ppd->dd->cspec->r1) + INIT_DELAYED_WORK(&cp->ipg_work, ipg_7322_work); + + /* + * For Mez and similar cards, no qsfp info, so do + * the "cable info" setup here. Can be overridden + * in adapter-specific routines. + */ + if (!(dd->flags & QIB_HAS_QSFP)) { + if (!IS_QMH(dd) && !IS_QME(dd)) + qib_devinfo(dd->pcidev, "IB%u:%u: " + "Unknown mezzanine card type\n", + dd->unit, ppd->port); + cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; + /* + * Choose center value as default tx serdes setting + * until changed through module parameter. + */ + ppd->cpspec->no_eep = IS_QMH(dd) ? + TXDDS_TABLE_SZ + 2 : TXDDS_TABLE_SZ + 4; + } else + cp->h1_val = H1_FORCE_VAL; + + /* Avoid writes to chip for mini_init */ + if (!qib_mini_init) + write_7322_init_portregs(ppd); + + init_timer(&cp->chase_timer); + cp->chase_timer.function = reenable_chase; + cp->chase_timer.data = (unsigned long)ppd; + + ppd++; + } + + dd->rcvhdrentsize = qib_rcvhdrentsize ? + qib_rcvhdrentsize : QIB_RCVHDR_ENTSIZE; + dd->rcvhdrsize = qib_rcvhdrsize ? + qib_rcvhdrsize : QIB_DFLT_RCVHDRSIZE; + dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32); + + /* we always allocate at least 2048 bytes for eager buffers */ + dd->rcvegrbufsize = max(mtu, 2048); + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); + + qib_7322_tidtemplate(dd); + + /* + * We can request a receive interrupt for 1 or + * more packets from current offset. + */ + dd->rhdrhead_intr_off = + (u64) rcv_int_count << IBA7322_HDRHEAD_PKTINT_SHIFT; + + /* setup the stats timer; the add_timer is done at end of init */ + init_timer(&dd->stats_timer); + dd->stats_timer.function = qib_get_7322_faststats; + dd->stats_timer.data = (unsigned long) dd; + + dd->ureg_align = 0x10000; /* 64KB alignment */ + + dd->piosize2kmax_dwords = dd->piosize2k >> 2; + + qib_7322_config_ctxts(dd); + qib_set_ctxtcnt(dd); + + if (qib_wc_pat) { + resource_size_t vl15off; + /* + * We do not set WC on the VL15 buffers to avoid + * a rare problem with unaligned writes from + * interrupt-flushed store buffers, so we need + * to map those separately here. We can't solve + * this for the rarely used mtrr case. + */ + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; + + /* vl15 buffers start just after the 4k buffers */ + vl15off = dd->physaddr + (dd->piobufbase >> 32) + + dd->piobcnt4k * dd->align4k; + dd->piovl15base = ioremap_nocache(vl15off, + NUM_VL15_BUFS * dd->align4k); + if (!dd->piovl15base) + goto bail; + } + qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ + + ret = 0; + if (qib_mini_init) + goto bail; + if (!dd->num_pports) { + qib_dev_err(dd, "No ports enabled, giving up initialization\n"); + goto bail; /* no error, so can still figure out why err */ + } + + write_7322_initregs(dd); + ret = qib_create_ctxts(dd); + init_7322_cntrnames(dd); + + updthresh = 8U; /* update threshold */ + + /* use all of 4KB buffers for the kernel SDMA, zero if !SDMA. + * reserve the update threshold amount for other kernel use, such + * as sending SMI, MAD, and ACKs, or 3, whichever is greater, + * unless we aren't enabling SDMA, in which case we want to use + * all the 4k bufs for the kernel. + * if this was less than the update threshold, we could wait + * a long time for an update. Coded this way because we + * sometimes change the update threshold for various reasons, + * and we want this to remain robust. + */ + if (dd->flags & QIB_HAS_SEND_DMA) { + dd->cspec->sdmabufcnt = dd->piobcnt4k; + sbufs = updthresh > 3 ? updthresh : 3; + } else { + dd->cspec->sdmabufcnt = 0; + sbufs = dd->piobcnt4k; + } + dd->cspec->lastbuf_for_pio = dd->piobcnt2k + dd->piobcnt4k - + dd->cspec->sdmabufcnt; + dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; + dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ? + dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0; + + /* + * If we have 16 user contexts, we will have 7 sbufs + * per context, so reduce the update threshold to match. We + * want to update before we actually run out, at low pbufs/ctxt + * so give ourselves some margin. + */ + if (dd->pbufsctxt >= 2 && dd->pbufsctxt - 2 < updthresh) + updthresh = dd->pbufsctxt - 2; + dd->cspec->updthresh_dflt = updthresh; + dd->cspec->updthresh = updthresh; + + /* before full enable, no interrupts, no locking needed */ + dd->sendctrl |= ((updthresh & SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld)) | + SYM_MASK(SendCtrl, SendBufAvailPad64Byte); + + dd->psxmitwait_supported = 1; + dd->psxmitwait_check_rate = QIB_7322_PSXMITWAIT_CHECK_RATE; +bail: + if (!dd->ctxtcnt) + dd->ctxtcnt = 1; /* for other initialization code */ + + return ret; +} + +static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *ppd, u64 pbc, + u32 *pbufnum) +{ + u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK; + struct qib_devdata *dd = ppd->dd; + + /* last is same for 2k and 4k, because we use 4k if all 2k busy */ + if (pbc & PBC_7322_VL15_SEND) { + first = dd->piobcnt2k + dd->piobcnt4k + ppd->hw_pidx; + last = first; + } else { + if ((plen + 1) > dd->piosize2kmax_dwords) + first = dd->piobcnt2k; + else + first = 0; + last = dd->cspec->lastbuf_for_pio; + } + return qib_getsendbuf_range(dd, pbufnum, first, last); +} + +static void qib_set_cntr_7322_sample(struct qib_pportdata *ppd, u32 intv, + u32 start) +{ + qib_write_kreg_port(ppd, krp_psinterval, intv); + qib_write_kreg_port(ppd, krp_psstart, start); +} + +/* + * Must be called with sdma_lock held, or before init finished. + */ +static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) +{ + qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); +} + +static struct sdma_set_state_action sdma_7322_action_table[] = { + [qib_sdma_state_s00_hw_down] = { + .go_s99_running_tofalse = 1, + .op_enable = 0, + .op_intenable = 0, + .op_halt = 0, + .op_drain = 0, + }, + [qib_sdma_state_s10_hw_start_up_wait] = { + .op_enable = 0, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s20_idle] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s30_sw_clean_up_wait] = { + .op_enable = 0, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s40_hw_clean_up_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s50_hw_halt_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 1, + }, + [qib_sdma_state_s99_running] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 0, + .op_drain = 0, + .go_s99_running_totrue = 1, + }, +}; + +static void qib_7322_sdma_init_early(struct qib_pportdata *ppd) +{ + ppd->sdma_state.set_state_action = sdma_7322_action_table; +} + +static int init_sdma_7322_regs(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + unsigned lastbuf, erstbuf; + u64 senddmabufmask[3] = { 0 }; + int n, ret = 0; + + qib_write_kreg_port(ppd, krp_senddmabase, ppd->sdma_descq_phys); + qib_sdma_7322_setlengen(ppd); + qib_sdma_update_7322_tail(ppd, 0); /* Set SendDmaTail */ + qib_write_kreg_port(ppd, krp_senddmareloadcnt, sdma_idle_cnt); + qib_write_kreg_port(ppd, krp_senddmadesccnt, 0); + qib_write_kreg_port(ppd, krp_senddmaheadaddr, ppd->sdma_head_phys); + + if (dd->num_pports) + n = dd->cspec->sdmabufcnt / dd->num_pports; /* no remainder */ + else + n = dd->cspec->sdmabufcnt; /* failsafe for init */ + erstbuf = (dd->piobcnt2k + dd->piobcnt4k) - + ((dd->num_pports == 1 || ppd->port == 2) ? n : + dd->cspec->sdmabufcnt); + lastbuf = erstbuf + n; + + ppd->sdma_state.first_sendbuf = erstbuf; + ppd->sdma_state.last_sendbuf = lastbuf; + for (; erstbuf < lastbuf; ++erstbuf) { + unsigned word = erstbuf / BITS_PER_LONG; + unsigned bit = erstbuf & (BITS_PER_LONG - 1); + + BUG_ON(word >= 3); + senddmabufmask[word] |= 1ULL << bit; + } + qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); + qib_write_kreg_port(ppd, krp_senddmabufmask1, senddmabufmask[1]); + qib_write_kreg_port(ppd, krp_senddmabufmask2, senddmabufmask[2]); + return ret; +} + +/* sdma_lock must be held */ +static u16 qib_sdma_7322_gethead(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + int sane; + int use_dmahead; + u16 swhead; + u16 swtail; + u16 cnt; + u16 hwhead; + + use_dmahead = __qib_sdma_running(ppd) && + (dd->flags & QIB_HAS_SDMA_TIMEOUT); +retry: + hwhead = use_dmahead ? + (u16) le64_to_cpu(*ppd->sdma_head_dma) : + (u16) qib_read_kreg_port(ppd, krp_senddmahead); + + swhead = ppd->sdma_descq_head; + swtail = ppd->sdma_descq_tail; + cnt = ppd->sdma_descq_cnt; + + if (swhead < swtail) + /* not wrapped */ + sane = (hwhead >= swhead) & (hwhead <= swtail); + else if (swhead > swtail) + /* wrapped around */ + sane = ((hwhead >= swhead) && (hwhead < cnt)) || + (hwhead <= swtail); + else + /* empty */ + sane = (hwhead == swhead); + + if (unlikely(!sane)) { + if (use_dmahead) { + /* try one more time, directly from the register */ + use_dmahead = 0; + goto retry; + } + /* proceed as if no progress */ + hwhead = swhead; + } + + return hwhead; +} + +static int qib_sdma_7322_busy(struct qib_pportdata *ppd) +{ + u64 hwstatus = qib_read_kreg_port(ppd, krp_senddmastatus); + + return (hwstatus & SYM_MASK(SendDmaStatus_0, ScoreBoardDrainInProg)) || + (hwstatus & SYM_MASK(SendDmaStatus_0, HaltInProg)) || + !(hwstatus & SYM_MASK(SendDmaStatus_0, InternalSDmaHalt)) || + !(hwstatus & SYM_MASK(SendDmaStatus_0, ScbEmpty)); +} + +/* + * Compute the amount of delay before sending the next packet if the + * port's send rate differs from the static rate set for the QP. + * The delay affects the next packet and the amount of the delay is + * based on the length of the this packet. + */ +static u32 qib_7322_setpbc_control(struct qib_pportdata *ppd, u32 plen, + u8 srate, u8 vl) +{ + u8 snd_mult = ppd->delay_mult; + u8 rcv_mult = ib_rate_to_delay[srate]; + u32 ret; + + ret = rcv_mult > snd_mult ? ((plen + 1) >> 1) * snd_mult : 0; + + /* Indicate VL15, else set the VL in the control word */ + if (vl == 15) + ret |= PBC_7322_VL15_SEND_CTRL; + else + ret |= vl << PBC_VL_NUM_LSB; + ret |= ((u32)(ppd->hw_pidx)) << PBC_PORT_SEL_LSB; + + return ret; +} + +/* + * Enable the per-port VL15 send buffers for use. + * They follow the rest of the buffers, without a config parameter. + * This was in initregs, but that is done before the shadow + * is set up, and this has to be done after the shadow is + * set up. + */ +static void qib_7322_initvl15_bufs(struct qib_devdata *dd) +{ + unsigned vl15bufs; + + vl15bufs = dd->piobcnt2k + dd->piobcnt4k; + qib_chg_pioavailkernel(dd, vl15bufs, NUM_VL15_BUFS, + TXCHK_CHG_TYPE_KERN, NULL); +} + +static void qib_7322_init_ctxt(struct qib_ctxtdata *rcd) +{ + if (rcd->ctxt < NUM_IB_PORTS) { + if (rcd->dd->num_pports > 1) { + rcd->rcvegrcnt = KCTXT0_EGRCNT / 2; + rcd->rcvegr_tid_base = rcd->ctxt ? rcd->rcvegrcnt : 0; + } else { + rcd->rcvegrcnt = KCTXT0_EGRCNT; + rcd->rcvegr_tid_base = 0; + } + } else { + rcd->rcvegrcnt = rcd->dd->cspec->rcvegrcnt; + rcd->rcvegr_tid_base = KCTXT0_EGRCNT + + (rcd->ctxt - NUM_IB_PORTS) * rcd->rcvegrcnt; + } +} + +#define QTXSLEEPS 5000 +static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start, + u32 len, u32 which, struct qib_ctxtdata *rcd) +{ + int i; + const int last = start + len - 1; + const int lastr = last / BITS_PER_LONG; + u32 sleeps = 0; + int wait = rcd != NULL; + unsigned long flags; + + while (wait) { + unsigned long shadow; + int cstart, previ = -1; + + /* + * when flipping from kernel to user, we can't change + * the checking type if the buffer is allocated to the + * driver. It's OK the other direction, because it's + * from close, and we have just disarm'ed all the + * buffers. All the kernel to kernel changes are also + * OK. + */ + for (cstart = start; cstart <= last; cstart++) { + i = ((2 * cstart) + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT) + / BITS_PER_LONG; + if (i != previ) { + shadow = (unsigned long) + le64_to_cpu(dd->pioavailregs_dma[i]); + previ = i; + } + if (test_bit(((2 * cstart) + + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT) + % BITS_PER_LONG, &shadow)) + break; + } + + if (cstart > last) + break; + + if (sleeps == QTXSLEEPS) + break; + /* make sure we see an updated copy next time around */ + sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + sleeps++; + msleep(20); + } + + switch (which) { + case TXCHK_CHG_TYPE_DIS1: + /* + * disable checking on a range; used by diags; just + * one buffer, but still written generically + */ + for (i = start; i <= last; i++) + clear_bit(i, dd->cspec->sendchkenable); + break; + + case TXCHK_CHG_TYPE_ENAB1: + /* + * (re)enable checking on a range; used by diags; just + * one buffer, but still written generically; read + * scratch to be sure buffer actually triggered, not + * just flushed from processor. + */ + qib_read_kreg32(dd, kr_scratch); + for (i = start; i <= last; i++) + set_bit(i, dd->cspec->sendchkenable); + break; + + case TXCHK_CHG_TYPE_KERN: + /* usable by kernel */ + for (i = start; i <= last; i++) { + set_bit(i, dd->cspec->sendibchk); + clear_bit(i, dd->cspec->sendgrhchk); + } + spin_lock_irqsave(&dd->uctxt_lock, flags); + /* see if we need to raise avail update threshold */ + for (i = dd->first_user_ctxt; + dd->cspec->updthresh != dd->cspec->updthresh_dflt + && i < dd->cfgctxts; i++) + if (dd->rcd[i] && dd->rcd[i]->subctxt_cnt && + ((dd->rcd[i]->piocnt / dd->rcd[i]->subctxt_cnt) - 1) + < dd->cspec->updthresh_dflt) + break; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + if (i == dd->cfgctxts) { + spin_lock_irqsave(&dd->sendctrl_lock, flags); + dd->cspec->updthresh = dd->cspec->updthresh_dflt; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) << + SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } + break; + + case TXCHK_CHG_TYPE_USER: + /* for user process */ + for (i = start; i <= last; i++) { + clear_bit(i, dd->cspec->sendibchk); + set_bit(i, dd->cspec->sendgrhchk); + } + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (rcd && rcd->subctxt_cnt && ((rcd->piocnt + / rcd->subctxt_cnt) - 1) < dd->cspec->updthresh) { + dd->cspec->updthresh = (rcd->piocnt / + rcd->subctxt_cnt) - 1; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } else + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + break; + + default: + break; + } + + for (i = start / BITS_PER_LONG; which >= 2 && i <= lastr; ++i) + qib_write_kreg(dd, kr_sendcheckmask + i, + dd->cspec->sendchkenable[i]); + + for (i = start / BITS_PER_LONG; which < 2 && i <= lastr; ++i) { + qib_write_kreg(dd, kr_sendgrhcheckmask + i, + dd->cspec->sendgrhchk[i]); + qib_write_kreg(dd, kr_sendibpktmask + i, + dd->cspec->sendibchk[i]); + } + + /* + * Be sure whatever we did was seen by the chip and acted upon, + * before we return. Mostly important for which >= 2. + */ + qib_read_kreg32(dd, kr_scratch); +} + + +/* useful for trigger analyzers, etc. */ +static void writescratch(struct qib_devdata *dd, u32 val) +{ + qib_write_kreg(dd, kr_scratch, val); +} + +/* Dummy for now, use chip regs soon */ +static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum) +{ + return -ENXIO; +} + +/** + * qib_init_iba7322_funcs - set up the chip-specific function pointers + * @dev: the pci_dev for qlogic_ib device + * @ent: pci_device_id struct for this dev + * + * Also allocates, inits, and returns the devdata struct for this + * device instance + * + * This is global, and is called directly at init to set up the + * chip-specific function pointers for later use. + */ +struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct qib_devdata *dd; + int ret, i; + u32 tabsize, actual_cnt = 0; + + dd = qib_alloc_devdata(pdev, + NUM_IB_PORTS * sizeof(struct qib_pportdata) + + sizeof(struct qib_chip_specific) + + NUM_IB_PORTS * sizeof(struct qib_chippport_specific)); + if (IS_ERR(dd)) + goto bail; + + dd->f_bringup_serdes = qib_7322_bringup_serdes; + dd->f_cleanup = qib_setup_7322_cleanup; + dd->f_clear_tids = qib_7322_clear_tids; + dd->f_free_irq = qib_7322_free_irq; + dd->f_get_base_info = qib_7322_get_base_info; + dd->f_get_msgheader = qib_7322_get_msgheader; + dd->f_getsendbuf = qib_7322_getsendbuf; + dd->f_gpio_mod = gpio_7322_mod; + dd->f_eeprom_wen = qib_7322_eeprom_wen; + dd->f_hdrqempty = qib_7322_hdrqempty; + dd->f_ib_updown = qib_7322_ib_updown; + dd->f_init_ctxt = qib_7322_init_ctxt; + dd->f_initvl15_bufs = qib_7322_initvl15_bufs; + dd->f_intr_fallback = qib_7322_intr_fallback; + dd->f_late_initreg = qib_late_7322_initreg; + dd->f_setpbc_control = qib_7322_setpbc_control; + dd->f_portcntr = qib_portcntr_7322; + dd->f_put_tid = qib_7322_put_tid; + dd->f_quiet_serdes = qib_7322_mini_quiet_serdes; + dd->f_rcvctrl = rcvctrl_7322_mod; + dd->f_read_cntrs = qib_read_7322cntrs; + dd->f_read_portcntrs = qib_read_7322portcntrs; + dd->f_reset = qib_do_7322_reset; + dd->f_init_sdma_regs = init_sdma_7322_regs; + dd->f_sdma_busy = qib_sdma_7322_busy; + dd->f_sdma_gethead = qib_sdma_7322_gethead; + dd->f_sdma_sendctrl = qib_7322_sdma_sendctrl; + dd->f_sdma_set_desc_cnt = qib_sdma_set_7322_desc_cnt; + dd->f_sdma_update_tail = qib_sdma_update_7322_tail; + dd->f_sendctrl = sendctrl_7322_mod; + dd->f_set_armlaunch = qib_set_7322_armlaunch; + dd->f_set_cntr_sample = qib_set_cntr_7322_sample; + dd->f_iblink_state = qib_7322_iblink_state; + dd->f_ibphys_portstate = qib_7322_phys_portstate; + dd->f_get_ib_cfg = qib_7322_get_ib_cfg; + dd->f_set_ib_cfg = qib_7322_set_ib_cfg; + dd->f_set_ib_loopback = qib_7322_set_loopback; + dd->f_get_ib_table = qib_7322_get_ib_table; + dd->f_set_ib_table = qib_7322_set_ib_table; + dd->f_set_intr_state = qib_7322_set_intr_state; + dd->f_setextled = qib_setup_7322_setextled; + dd->f_txchk_change = qib_7322_txchk_change; + dd->f_update_usrhead = qib_update_7322_usrhead; + dd->f_wantpiobuf_intr = qib_wantpiobuf_7322_intr; + dd->f_xgxs_reset = qib_7322_mini_pcs_reset; + dd->f_sdma_hw_clean_up = qib_7322_sdma_hw_clean_up; + dd->f_sdma_hw_start_up = qib_7322_sdma_hw_start_up; + dd->f_sdma_init_early = qib_7322_sdma_init_early; + dd->f_writescratch = writescratch; + dd->f_tempsense_rd = qib_7322_tempsense_rd; + /* + * Do remaining PCIe setup and save PCIe values in dd. + * Any error printing is already done by the init code. + * On return, we have the chip mapped, but chip registers + * are not set up until start of qib_init_7322_variables. + */ + ret = qib_pcie_ddinit(dd, pdev, ent); + if (ret < 0) + goto bail_free; + + /* initialize chip-specific variables */ + ret = qib_init_7322_variables(dd); + if (ret) + goto bail_cleanup; + + if (qib_mini_init || !dd->num_pports) + goto bail; + + /* + * Determine number of vectors we want; depends on port count + * and number of configured kernel receive queues actually used. + * Should also depend on whether sdma is enabled or not, but + * that's such a rare testing case it's not worth worrying about. + */ + tabsize = dd->first_user_ctxt + ARRAY_SIZE(irq_table); + for (i = 0; i < tabsize; i++) + if ((i < ARRAY_SIZE(irq_table) && + irq_table[i].port <= dd->num_pports) || + (i >= ARRAY_SIZE(irq_table) && + dd->rcd[i - ARRAY_SIZE(irq_table)])) + actual_cnt++; + /* reduce by ctxt's < 2 */ + if (qib_krcvq01_no_msi) + actual_cnt -= dd->num_pports; + + tabsize = actual_cnt; + dd->cspec->msix_entries = kmalloc(tabsize * + sizeof(struct qib_msix_entry), GFP_KERNEL); + if (!dd->cspec->msix_entries) { + qib_dev_err(dd, "No memory for MSIx table\n"); + tabsize = 0; + } + for (i = 0; i < tabsize; i++) + dd->cspec->msix_entries[i].msix.entry = i; + + if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) + qib_dev_err(dd, "Failed to setup PCIe or interrupts; " + "continuing anyway\n"); + /* may be less than we wanted, if not enough available */ + dd->cspec->num_msix_entries = tabsize; + + /* setup interrupt handler */ + qib_setup_7322_interrupt(dd, 1); + + /* clear diagctrl register, in case diags were running and crashed */ + qib_write_kreg(dd, kr_hwdiagctrl, 0); + + goto bail; + +bail_cleanup: + qib_pcie_ddcleanup(dd); +bail_free: + qib_free_devdata(dd); + dd = ERR_PTR(ret); +bail: + return dd; +} + +/* + * Set the table entry at the specified index from the table specifed. + * There are 3 * TXDDS_TABLE_SZ entries in all per port, with the first + * TXDDS_TABLE_SZ for SDR, the next for DDR, and the last for QDR. + * 'idx' below addresses the correct entry, while its 4 LSBs select the + * corresponding entry (one of TXDDS_TABLE_SZ) from the selected table. + */ +#define DDS_ENT_AMP_LSB 14 +#define DDS_ENT_MAIN_LSB 9 +#define DDS_ENT_POST_LSB 5 +#define DDS_ENT_PRE_XTRA_LSB 3 +#define DDS_ENT_PRE_LSB 0 + +/* + * Set one entry in the TxDDS table for spec'd port + * ridx picks one of the entries, while tp points + * to the appropriate table entry. + */ +static void set_txdds(struct qib_pportdata *ppd, int ridx, + const struct txdds_ent *tp) +{ + struct qib_devdata *dd = ppd->dd; + u32 pack_ent; + int regidx; + + /* Get correct offset in chip-space, and in source table */ + regidx = KREG_IBPORT_IDX(IBSD_DDS_MAP_TABLE) + ridx; + /* + * We do not use qib_write_kreg_port() because it was intended + * only for registers in the lower "port specific" pages. + * So do index calculation by hand. + */ + if (ppd->hw_pidx) + regidx += (dd->palign / sizeof(u64)); + + pack_ent = tp->amp << DDS_ENT_AMP_LSB; + pack_ent |= tp->main << DDS_ENT_MAIN_LSB; + pack_ent |= tp->pre << DDS_ENT_PRE_LSB; + pack_ent |= tp->post << DDS_ENT_POST_LSB; + qib_write_kreg(dd, regidx, pack_ent); + /* Prevent back-to-back writes by hitting scratch */ + qib_write_kreg(ppd->dd, kr_scratch, 0); +} + +static const struct vendor_txdds_ent vendor_txdds[] = { + { /* Amphenol 1m 30awg NoEq */ + { 0x41, 0x50, 0x48 }, "584470002 ", + { 10, 0, 0, 5 }, { 10, 0, 0, 9 }, { 7, 1, 0, 13 }, + }, + { /* Amphenol 3m 28awg NoEq */ + { 0x41, 0x50, 0x48 }, "584470004 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 0, 1, 7, 15 }, + }, + { /* Finisar 3m OM2 Optical */ + { 0x00, 0x90, 0x65 }, "FCBG410QB1C03-QL", + { 0, 0, 0, 3 }, { 0, 0, 0, 4 }, { 0, 0, 0, 13 }, + }, + { /* Finisar 30m OM2 Optical */ + { 0x00, 0x90, 0x65 }, "FCBG410QB1C30-QL", + { 0, 0, 0, 1 }, { 0, 0, 0, 5 }, { 0, 0, 0, 11 }, + }, + { /* Finisar Default OM2 Optical */ + { 0x00, 0x90, 0x65 }, NULL, + { 0, 0, 0, 2 }, { 0, 0, 0, 5 }, { 0, 0, 0, 12 }, + }, + { /* Gore 1m 30awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3300-1 ", + { 0, 0, 0, 6 }, { 0, 0, 0, 9 }, { 0, 1, 0, 15 }, + }, + { /* Gore 2m 30awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3300-2 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 10 }, { 0, 1, 7, 15 }, + }, + { /* Gore 1m 28awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3800-1 ", + { 0, 0, 0, 6 }, { 0, 0, 0, 8 }, { 0, 1, 0, 15 }, + }, + { /* Gore 3m 28awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3800-3 ", + { 0, 0, 0, 9 }, { 0, 0, 0, 13 }, { 0, 1, 7, 15 }, + }, + { /* Gore 5m 24awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7000-5 ", + { 0, 0, 0, 7 }, { 0, 0, 0, 9 }, { 0, 1, 3, 15 }, + }, + { /* Gore 7m 24awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7000-7 ", + { 0, 0, 0, 9 }, { 0, 0, 0, 11 }, { 0, 2, 6, 15 }, + }, + { /* Gore 5m 26awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7600-5 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 0, 1, 9, 13 }, + }, + { /* Gore 7m 26awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7600-7 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 10, 1, 8, 15 }, + }, + { /* Intersil 12m 24awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP1224", + { 0, 0, 0, 2 }, { 0, 0, 0, 5 }, { 0, 3, 0, 9 }, + }, + { /* Intersil 10m 28awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP1028", + { 0, 0, 0, 6 }, { 0, 0, 0, 4 }, { 0, 2, 0, 2 }, + }, + { /* Intersil 7m 30awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP0730", + { 0, 0, 0, 6 }, { 0, 0, 0, 4 }, { 0, 1, 0, 3 }, + }, + { /* Intersil 5m 32awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP0532", + { 0, 0, 0, 6 }, { 0, 0, 0, 6 }, { 0, 2, 0, 8 }, + }, + { /* Intersil Default Active */ + { 0x00, 0x30, 0xB4 }, NULL, + { 0, 0, 0, 6 }, { 0, 0, 0, 5 }, { 0, 2, 0, 5 }, + }, + { /* Luxtera 20m Active Optical */ + { 0x00, 0x25, 0x63 }, NULL, + { 0, 0, 0, 5 }, { 0, 0, 0, 8 }, { 0, 2, 0, 12 }, + }, + { /* Molex 1M Cu loopback */ + { 0x00, 0x09, 0x3A }, "74763-0025 ", + { 2, 2, 6, 15 }, { 2, 2, 6, 15 }, { 2, 2, 6, 15 }, + }, + { /* Molex 2m 28awg NoEq */ + { 0x00, 0x09, 0x3A }, "74757-2201 ", + { 0, 0, 0, 6 }, { 0, 0, 0, 9 }, { 0, 1, 1, 15 }, + }, +}; + +static const struct txdds_ent txdds_sdr[TXDDS_TABLE_SZ] = { + /* amp, pre, main, post */ + { 2, 2, 15, 6 }, /* Loopback */ + { 0, 0, 0, 1 }, /* 2 dB */ + { 0, 0, 0, 2 }, /* 3 dB */ + { 0, 0, 0, 3 }, /* 4 dB */ + { 0, 0, 0, 4 }, /* 5 dB */ + { 0, 0, 0, 5 }, /* 6 dB */ + { 0, 0, 0, 6 }, /* 7 dB */ + { 0, 0, 0, 7 }, /* 8 dB */ + { 0, 0, 0, 8 }, /* 9 dB */ + { 0, 0, 0, 9 }, /* 10 dB */ + { 0, 0, 0, 10 }, /* 11 dB */ + { 0, 0, 0, 11 }, /* 12 dB */ + { 0, 0, 0, 12 }, /* 13 dB */ + { 0, 0, 0, 13 }, /* 14 dB */ + { 0, 0, 0, 14 }, /* 15 dB */ + { 0, 0, 0, 15 }, /* 16 dB */ +}; + +static const struct txdds_ent txdds_ddr[TXDDS_TABLE_SZ] = { + /* amp, pre, main, post */ + { 2, 2, 15, 6 }, /* Loopback */ + { 0, 0, 0, 8 }, /* 2 dB */ + { 0, 0, 0, 8 }, /* 3 dB */ + { 0, 0, 0, 9 }, /* 4 dB */ + { 0, 0, 0, 9 }, /* 5 dB */ + { 0, 0, 0, 10 }, /* 6 dB */ + { 0, 0, 0, 10 }, /* 7 dB */ + { 0, 0, 0, 11 }, /* 8 dB */ + { 0, 0, 0, 11 }, /* 9 dB */ + { 0, 0, 0, 12 }, /* 10 dB */ + { 0, 0, 0, 12 }, /* 11 dB */ + { 0, 0, 0, 13 }, /* 12 dB */ + { 0, 0, 0, 13 }, /* 13 dB */ + { 0, 0, 0, 14 }, /* 14 dB */ + { 0, 0, 0, 14 }, /* 15 dB */ + { 0, 0, 0, 15 }, /* 16 dB */ +}; + +static const struct txdds_ent txdds_qdr[TXDDS_TABLE_SZ] = { + /* amp, pre, main, post */ + { 2, 2, 15, 6 }, /* Loopback */ + { 0, 1, 0, 7 }, /* 2 dB (also QMH7342) */ + { 0, 1, 0, 9 }, /* 3 dB (also QMH7342) */ + { 0, 1, 0, 11 }, /* 4 dB */ + { 0, 1, 0, 13 }, /* 5 dB */ + { 0, 1, 0, 15 }, /* 6 dB */ + { 0, 1, 3, 15 }, /* 7 dB */ + { 0, 1, 7, 15 }, /* 8 dB */ + { 0, 1, 7, 15 }, /* 9 dB */ + { 0, 1, 8, 15 }, /* 10 dB */ + { 0, 1, 9, 15 }, /* 11 dB */ + { 0, 1, 10, 15 }, /* 12 dB */ + { 0, 2, 6, 15 }, /* 13 dB */ + { 0, 2, 7, 15 }, /* 14 dB */ + { 0, 2, 8, 15 }, /* 15 dB */ + { 0, 2, 9, 15 }, /* 16 dB */ +}; + +/* + * extra entries for use with txselect, for indices >= TXDDS_TABLE_SZ. + * These are mostly used for mez cards going through connectors + * and backplane traces, but can be used to add other "unusual" + * table values as well. + */ +static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { + /* amp, pre, main, post */ + { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ +}; + +static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { + /* amp, pre, main, post */ + { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ +}; + +static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { + /* amp, pre, main, post */ + { 0, 1, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ + { 0, 1, 12, 10 }, /* QME7342 backplane setting */ + { 0, 1, 12, 11 }, /* QME7342 backplane setting */ + { 0, 1, 12, 12 }, /* QME7342 backplane setting */ + { 0, 1, 12, 14 }, /* QME7342 backplane setting */ + { 0, 1, 12, 6 }, /* QME7342 backplane setting */ + { 0, 1, 12, 7 }, /* QME7342 backplane setting */ + { 0, 1, 12, 8 }, /* QME7342 backplane setting */ + { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ +}; + +static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { + /* amp, pre, main, post */ + { 0, 0, 0, 0 }, /* QME7342 mfg settings */ + { 0, 0, 0, 6 }, /* QME7342 P2 mfg settings */ +}; + +static const struct txdds_ent *get_atten_table(const struct txdds_ent *txdds, + unsigned atten) +{ + /* + * The attenuation table starts at 2dB for entry 1, + * with entry 0 being the loopback entry. + */ + if (atten <= 2) + atten = 1; + else if (atten > TXDDS_TABLE_SZ) + atten = TXDDS_TABLE_SZ - 1; + else + atten--; + return txdds + atten; +} + +/* + * if override is set, the module parameter txselect has a value + * for this specific port, so use it, rather than our normal mechanism. + */ +static void find_best_ent(struct qib_pportdata *ppd, + const struct txdds_ent **sdr_dds, + const struct txdds_ent **ddr_dds, + const struct txdds_ent **qdr_dds, int override) +{ + struct qib_qsfp_cache *qd = &ppd->cpspec->qsfp_data.cache; + int idx; + + /* Search table of known cables */ + for (idx = 0; !override && idx < ARRAY_SIZE(vendor_txdds); ++idx) { + const struct vendor_txdds_ent *v = vendor_txdds + idx; + + if (!memcmp(v->oui, qd->oui, QSFP_VOUI_LEN) && + (!v->partnum || + !memcmp(v->partnum, qd->partnum, QSFP_PN_LEN))) { + *sdr_dds = &v->sdr; + *ddr_dds = &v->ddr; + *qdr_dds = &v->qdr; + return; + } + } + + /* Active cables don't have attenuation so we only set SERDES + * settings to account for the attenuation of the board traces. */ + if (!override && QSFP_IS_ACTIVE(qd->tech)) { + *sdr_dds = txdds_sdr + ppd->dd->board_atten; + *ddr_dds = txdds_ddr + ppd->dd->board_atten; + *qdr_dds = txdds_qdr + ppd->dd->board_atten; + return; + } + + if (!override && QSFP_HAS_ATTEN(qd->tech) && (qd->atten[0] || + qd->atten[1])) { + *sdr_dds = get_atten_table(txdds_sdr, qd->atten[0]); + *ddr_dds = get_atten_table(txdds_ddr, qd->atten[0]); + *qdr_dds = get_atten_table(txdds_qdr, qd->atten[1]); + return; + } else if (ppd->cpspec->no_eep < TXDDS_TABLE_SZ) { + /* + * If we have no (or incomplete) data from the cable + * EEPROM, or no QSFP, or override is set, use the + * module parameter value to index into the attentuation + * table. + */ + idx = ppd->cpspec->no_eep; + *sdr_dds = &txdds_sdr[idx]; + *ddr_dds = &txdds_ddr[idx]; + *qdr_dds = &txdds_qdr[idx]; + } else if (ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)) { + /* similar to above, but index into the "extra" table. */ + idx = ppd->cpspec->no_eep - TXDDS_TABLE_SZ; + *sdr_dds = &txdds_extra_sdr[idx]; + *ddr_dds = &txdds_extra_ddr[idx]; + *qdr_dds = &txdds_extra_qdr[idx]; + } else if ((IS_QME(ppd->dd) || IS_QMH(ppd->dd)) && + ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + TXDDS_MFG_SZ)) { + idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); + printk(KERN_INFO QIB_DRV_NAME + " IB%u:%u use idx %u into txdds_mfg\n", + ppd->dd->unit, ppd->port, idx); + *sdr_dds = &txdds_extra_mfg[idx]; + *ddr_dds = &txdds_extra_mfg[idx]; + *qdr_dds = &txdds_extra_mfg[idx]; + } else { + /* this shouldn't happen, it's range checked */ + *sdr_dds = txdds_sdr + qib_long_atten; + *ddr_dds = txdds_ddr + qib_long_atten; + *qdr_dds = txdds_qdr + qib_long_atten; + } +} + +static void init_txdds_table(struct qib_pportdata *ppd, int override) +{ + const struct txdds_ent *sdr_dds, *ddr_dds, *qdr_dds; + struct txdds_ent *dds; + int idx; + int single_ent = 0; + + find_best_ent(ppd, &sdr_dds, &ddr_dds, &qdr_dds, override); + + /* for mez cards or override, use the selected value for all entries */ + if (!(ppd->dd->flags & QIB_HAS_QSFP) || override) + single_ent = 1; + + /* Fill in the first entry with the best entry found. */ + set_txdds(ppd, 0, sdr_dds); + set_txdds(ppd, TXDDS_TABLE_SZ, ddr_dds); + set_txdds(ppd, 2 * TXDDS_TABLE_SZ, qdr_dds); + if (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE)) { + dds = (struct txdds_ent *)(ppd->link_speed_active == + QIB_IB_QDR ? qdr_dds : + (ppd->link_speed_active == + QIB_IB_DDR ? ddr_dds : sdr_dds)); + write_tx_serdes_param(ppd, dds); + } + + /* Fill in the remaining entries with the default table values. */ + for (idx = 1; idx < ARRAY_SIZE(txdds_sdr); ++idx) { + set_txdds(ppd, idx, single_ent ? sdr_dds : txdds_sdr + idx); + set_txdds(ppd, idx + TXDDS_TABLE_SZ, + single_ent ? ddr_dds : txdds_ddr + idx); + set_txdds(ppd, idx + 2 * TXDDS_TABLE_SZ, + single_ent ? qdr_dds : txdds_qdr + idx); + } +} + +#define KR_AHB_ACC KREG_IDX(ahb_access_ctrl) +#define KR_AHB_TRANS KREG_IDX(ahb_transaction_reg) +#define AHB_TRANS_RDY SYM_MASK(ahb_transaction_reg, ahb_rdy) +#define AHB_ADDR_LSB SYM_LSB(ahb_transaction_reg, ahb_address) +#define AHB_DATA_LSB SYM_LSB(ahb_transaction_reg, ahb_data) +#define AHB_WR SYM_MASK(ahb_transaction_reg, write_not_read) +#define AHB_TRANS_TRIES 10 + +/* + * The chan argument is 0=chan0, 1=chan1, 2=pll, 3=chan2, 4=chan4, + * 5=subsystem which is why most calls have "chan + chan >> 1" + * for the channel argument. + */ +static u32 ahb_mod(struct qib_devdata *dd, int quad, int chan, int addr, + u32 data, u32 mask) +{ + u32 rd_data, wr_data, sz_mask; + u64 trans, acc, prev_acc; + u32 ret = 0xBAD0BAD; + int tries; + + prev_acc = qib_read_kreg64(dd, KR_AHB_ACC); + /* From this point on, make sure we return access */ + acc = (quad << 1) | 1; + qib_write_kreg(dd, KR_AHB_ACC, acc); + + for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) { + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + if (trans & AHB_TRANS_RDY) + break; + } + if (tries >= AHB_TRANS_TRIES) { + qib_dev_err(dd, "No ahb_rdy in %d tries\n", AHB_TRANS_TRIES); + goto bail; + } + + /* If mask is not all 1s, we need to read, but different SerDes + * entities have different sizes + */ + sz_mask = (1UL << ((quad == 1) ? 32 : 16)) - 1; + wr_data = data & mask & sz_mask; + if ((~mask & sz_mask) != 0) { + trans = ((chan << 6) | addr) << (AHB_ADDR_LSB + 1); + qib_write_kreg(dd, KR_AHB_TRANS, trans); + + for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) { + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + if (trans & AHB_TRANS_RDY) + break; + } + if (tries >= AHB_TRANS_TRIES) { + qib_dev_err(dd, "No Rd ahb_rdy in %d tries\n", + AHB_TRANS_TRIES); + goto bail; + } + /* Re-read in case host split reads and read data first */ + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + rd_data = (uint32_t)(trans >> AHB_DATA_LSB); + wr_data |= (rd_data & ~mask & sz_mask); + } + + /* If mask is not zero, we need to write. */ + if (mask & sz_mask) { + trans = ((chan << 6) | addr) << (AHB_ADDR_LSB + 1); + trans |= ((uint64_t)wr_data << AHB_DATA_LSB); + trans |= AHB_WR; + qib_write_kreg(dd, KR_AHB_TRANS, trans); + + for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) { + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + if (trans & AHB_TRANS_RDY) + break; + } + if (tries >= AHB_TRANS_TRIES) { + qib_dev_err(dd, "No Wr ahb_rdy in %d tries\n", + AHB_TRANS_TRIES); + goto bail; + } + } + ret = wr_data; +bail: + qib_write_kreg(dd, KR_AHB_ACC, prev_acc); + return ret; +} + +static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data, + unsigned mask) +{ + struct qib_devdata *dd = ppd->dd; + int chan; + u32 rbc; + + for (chan = 0; chan < SERDES_CHANS; ++chan) { + ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), addr, + data, mask); + rbc = ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + addr, 0, 0); + } +} + +static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable) +{ + u64 data = qib_read_kreg_port(ppd, krp_serdesctrl); + u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); + + if (enable && !state) { + printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n", + ppd->dd->unit, ppd->port); + data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); + } else if (!enable && state) { + printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n", + ppd->dd->unit, ppd->port); + data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); + } + qib_write_kreg_port(ppd, krp_serdesctrl, data); +} + +static int serdes_7322_init(struct qib_pportdata *ppd) +{ + int ret = 0; + if (ppd->dd->cspec->r1) + ret = serdes_7322_init_old(ppd); + else + ret = serdes_7322_init_new(ppd); + return ret; +} + +static int serdes_7322_init_old(struct qib_pportdata *ppd) +{ + u32 le_val; + + /* + * Initialize the Tx DDS tables. Also done every QSFP event, + * for adapters with QSFP + */ + init_txdds_table(ppd, 0); + + /* ensure no tx overrides from earlier driver loads */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + + /* Patch some SerDes defaults to "Better for IB" */ + /* Timing Loop Bandwidth: cdr_timing[11:9] = 0 */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9)); + + /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */ + ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11)); + /* Enable LE2: rxle2en_r2a addr 13 bit [6] = 1 */ + ibsd_wr_allchans(ppd, 13, (1 << 6), (1 << 6)); + + /* May be overridden in qsfp_7322_event */ + le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7)); + + /* enable LE1 adaptation for all but QME, which is disabled */ + le_val = IS_QME(ppd->dd) ? 0 : 1; + ibsd_wr_allchans(ppd, 13, (le_val << 5), (1 << 5)); + + /* Clear cmode-override, may be set from older driver */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); + + /* Timing Recovery: rxtapsel addr 5 bits [9:8] = 0 */ + ibsd_wr_allchans(ppd, 5, (0 << 8), BMASK(9, 8)); + + /* setup LoS params; these are subsystem, so chan == 5 */ + /* LoS filter threshold_count on, ch 0-3, set to 8 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4)); + + /* LoS filter threshold_count off, ch 0-3, set to 4 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8)); + + /* LoS filter select enabled */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15); + + /* LoS target data: SDR=4, DDR=2, QDR=1 */ + ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */ + ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */ + ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */ + + serdes_7322_los_enable(ppd, 1); + + /* rxbistena; set 0 to avoid effects of it switch later */ + ibsd_wr_allchans(ppd, 9, 0 << 15, 1 << 15); + + /* Configure 4 DFE taps, and only they adapt */ + ibsd_wr_allchans(ppd, 16, 0 << 0, BMASK(1, 0)); + + /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */ + le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac; + ibsd_wr_allchans(ppd, 21, le_val, 0xfffe); + + /* + * Set receive adaptation mode. SDR and DDR adaptation are + * always on, and QDR is initially enabled; later disabled. + */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); + ppd->cpspec->qdr_dfe_on = 1; + + /* FLoop LOS gate: PPM filter enabled */ + ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10); + + /* rx offset center enabled */ + ibsd_wr_allchans(ppd, 12, 1 << 4, 1 << 4); + + if (!ppd->dd->cspec->r1) { + ibsd_wr_allchans(ppd, 12, 1 << 12, 1 << 12); + ibsd_wr_allchans(ppd, 12, 2 << 8, 0x0f << 8); + } + + /* Set the frequency loop bandwidth to 15 */ + ibsd_wr_allchans(ppd, 2, 15 << 5, BMASK(8, 5)); + + return 0; +} + +static int serdes_7322_init_new(struct qib_pportdata *ppd) +{ + unsigned long tend; + u32 le_val, rxcaldone; + int chan, chan_done = (1 << SERDES_CHANS) - 1; + + /* Clear cmode-override, may be set from older driver */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); + + /* ensure no tx overrides from earlier driver loads */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + + /* START OF LSI SUGGESTED SERDES BRINGUP */ + /* Reset - Calibration Setup */ + /* Stop DFE adaptaion */ + ibsd_wr_allchans(ppd, 1, 0, BMASK(9, 1)); + /* Disable LE1 */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(5, 5)); + /* Disable autoadapt for LE1 */ + ibsd_wr_allchans(ppd, 1, 0, BMASK(15, 15)); + /* Disable LE2 */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(6, 6)); + /* Disable VGA */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); + /* Disable AFE Offset Cancel */ + ibsd_wr_allchans(ppd, 12, 0, BMASK(12, 12)); + /* Disable Timing Loop */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(3, 3)); + /* Disable Frequency Loop */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(4, 4)); + /* Disable Baseline Wander Correction */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(13, 13)); + /* Disable RX Calibration */ + ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10)); + /* Disable RX Offset Calibration */ + ibsd_wr_allchans(ppd, 12, 0, BMASK(4, 4)); + /* Select BB CDR */ + ibsd_wr_allchans(ppd, 2, (1 << 15), BMASK(15, 15)); + /* CDR Step Size */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(9, 8)); + /* Enable phase Calibration */ + ibsd_wr_allchans(ppd, 12, (1 << 5), BMASK(5, 5)); + /* DFE Bandwidth [2:14-12] */ + ibsd_wr_allchans(ppd, 2, (4 << 12), BMASK(14, 12)); + /* DFE Config (4 taps only) */ + ibsd_wr_allchans(ppd, 16, 0, BMASK(1, 0)); + /* Gain Loop Bandwidth */ + if (!ppd->dd->cspec->r1) { + ibsd_wr_allchans(ppd, 12, 1 << 12, BMASK(12, 12)); + ibsd_wr_allchans(ppd, 12, 2 << 8, BMASK(11, 8)); + } else { + ibsd_wr_allchans(ppd, 19, (3 << 11), BMASK(13, 11)); + } + /* Baseline Wander Correction Gain [13:4-0] (leave as default) */ + /* Baseline Wander Correction Gain [3:7-5] (leave as default) */ + /* Data Rate Select [5:7-6] (leave as default) */ + /* RX Parallel Word Width [3:10-8] (leave as default) */ + + /* RX REST */ + /* Single- or Multi-channel reset */ + /* RX Analog reset */ + /* RX Digital reset */ + ibsd_wr_allchans(ppd, 0, 0, BMASK(15, 13)); + msleep(20); + /* RX Analog reset */ + ibsd_wr_allchans(ppd, 0, (1 << 14), BMASK(14, 14)); + msleep(20); + /* RX Digital reset */ + ibsd_wr_allchans(ppd, 0, (1 << 13), BMASK(13, 13)); + msleep(20); + + /* setup LoS params; these are subsystem, so chan == 5 */ + /* LoS filter threshold_count on, ch 0-3, set to 8 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4)); + + /* LoS filter threshold_count off, ch 0-3, set to 4 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8)); + + /* LoS filter select enabled */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15); + + /* LoS target data: SDR=4, DDR=2, QDR=1 */ + ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */ + ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */ + ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */ + + /* Turn on LOS on initial SERDES init */ + serdes_7322_los_enable(ppd, 1); + /* FLoop LOS gate: PPM filter enabled */ + ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10); + + /* RX LATCH CALIBRATION */ + /* Enable Eyefinder Phase Calibration latch */ + ibsd_wr_allchans(ppd, 15, 1, BMASK(0, 0)); + /* Enable RX Offset Calibration latch */ + ibsd_wr_allchans(ppd, 12, (1 << 4), BMASK(4, 4)); + msleep(20); + /* Start Calibration */ + ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10)); + tend = jiffies + msecs_to_jiffies(500); + while (chan_done && !time_is_before_jiffies(tend)) { + msleep(20); + for (chan = 0; chan < SERDES_CHANS; ++chan) { + rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), + (chan + (chan >> 1)), + 25, 0, 0); + if ((~rxcaldone & (u32)BMASK(9, 9)) == 0 && + (~chan_done & (1 << chan)) == 0) + chan_done &= ~(1 << chan); + } + } + if (chan_done) { + printk(KERN_INFO QIB_DRV_NAME + " Serdes %d calibration not done after .5 sec: 0x%x\n", + IBSD(ppd->hw_pidx), chan_done); + } else { + for (chan = 0; chan < SERDES_CHANS; ++chan) { + rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), + (chan + (chan >> 1)), + 25, 0, 0); + if ((~rxcaldone & (u32)BMASK(10, 10)) == 0) + printk(KERN_INFO QIB_DRV_NAME + " Serdes %d chan %d calibration " + "failed\n", IBSD(ppd->hw_pidx), chan); + } + } + + /* Turn off Calibration */ + ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10)); + msleep(20); + + /* BRING RX UP */ + /* Set LE2 value (May be overridden in qsfp_7322_event) */ + le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7)); + /* Set LE2 Loop bandwidth */ + ibsd_wr_allchans(ppd, 3, (7 << 5), BMASK(7, 5)); + /* Enable LE2 */ + ibsd_wr_allchans(ppd, 13, (1 << 6), BMASK(6, 6)); + msleep(20); + /* Enable H0 only */ + ibsd_wr_allchans(ppd, 1, 1, BMASK(9, 1)); + /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */ + le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac; + ibsd_wr_allchans(ppd, 21, le_val, 0xfffe); + /* Enable VGA */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); + msleep(20); + /* Set Frequency Loop Bandwidth */ + ibsd_wr_allchans(ppd, 2, (7 << 5), BMASK(8, 5)); + /* Enable Frequency Loop */ + ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4)); + /* Set Timing Loop Bandwidth */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9)); + /* Enable Timing Loop */ + ibsd_wr_allchans(ppd, 2, (1 << 3), BMASK(3, 3)); + msleep(50); + /* Enable DFE + * Set receive adaptation mode. SDR and DDR adaptation are + * always on, and QDR is initially enabled; later disabled. + */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); + ppd->cpspec->qdr_dfe_on = 1; + /* Disable LE1 */ + ibsd_wr_allchans(ppd, 13, (0 << 5), (1 << 5)); + /* Disable auto adapt for LE1 */ + ibsd_wr_allchans(ppd, 1, (0 << 15), BMASK(15, 15)); + msleep(20); + /* Enable AFE Offset Cancel */ + ibsd_wr_allchans(ppd, 12, (1 << 12), BMASK(12, 12)); + /* Enable Baseline Wander Correction */ + ibsd_wr_allchans(ppd, 12, (1 << 13), BMASK(13, 13)); + /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */ + ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11)); + /* VGA output common mode */ + ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2)); + + /* + * Initialize the Tx DDS tables. Also done every QSFP event, + * for adapters with QSFP + */ + init_txdds_table(ppd, 0); + + return 0; +} + +/* start adjust QMH serdes parameters */ + +static void set_man_code(struct qib_pportdata *ppd, int chan, int code) +{ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 9, code << 9, 0x3f << 9); +} + +static void set_man_mode_h1(struct qib_pportdata *ppd, int chan, + int enable, u32 tapenable) +{ + if (enable) + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 1, 3 << 10, 0x1f << 10); + else + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 1, 0, 0x1f << 10); +} + +/* Set clock to 1, 0, 1, 0 */ +static void clock_man(struct qib_pportdata *ppd, int chan) +{ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0x4000, 0x4000); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0, 0x4000); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0x4000, 0x4000); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0, 0x4000); +} + +/* + * write the current Tx serdes pre,post,main,amp settings into the serdes. + * The caller must pass the settings appropriate for the current speed, + * or not care if they are correct for the current speed. + */ +static void write_tx_serdes_param(struct qib_pportdata *ppd, + struct txdds_ent *txdds) +{ + u64 deemph; + + deemph = qib_read_kreg_port(ppd, krp_tx_deemph_override); + /* field names for amp, main, post, pre, respectively */ + deemph &= ~(SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txampcntl_d2a) | + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txc0_ena) | + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txcp1_ena) | + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txcn1_ena)); + + deemph |= SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + tx_override_deemphasis_select); + deemph |= (txdds->amp & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txampcntl_d2a)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txampcntl_d2a); + deemph |= (txdds->main & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txc0_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txc0_ena); + deemph |= (txdds->post & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcp1_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcp1_ena); + deemph |= (txdds->pre & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcn1_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcn1_ena); + qib_write_kreg_port(ppd, krp_tx_deemph_override, deemph); +} + +/* + * Set the parameters for mez cards on link bounce, so they are + * always exactly what was requested. Similar logic to init_txdds + * but does just the serdes. + */ +static void adj_tx_serdes(struct qib_pportdata *ppd) +{ + const struct txdds_ent *sdr_dds, *ddr_dds, *qdr_dds; + struct txdds_ent *dds; + + find_best_ent(ppd, &sdr_dds, &ddr_dds, &qdr_dds, 1); + dds = (struct txdds_ent *)(ppd->link_speed_active == QIB_IB_QDR ? + qdr_dds : (ppd->link_speed_active == QIB_IB_DDR ? + ddr_dds : sdr_dds)); + write_tx_serdes_param(ppd, dds); +} + +/* set QDR forced value for H1, if needed */ +static void force_h1(struct qib_pportdata *ppd) +{ + int chan; + + ppd->cpspec->qdr_reforce = 0; + if (!ppd->dd->cspec->r1) + return; + + for (chan = 0; chan < SERDES_CHANS; chan++) { + set_man_mode_h1(ppd, chan, 1, 0); + set_man_code(ppd, chan, ppd->cpspec->h1_val); + clock_man(ppd, chan); + set_man_mode_h1(ppd, chan, 0, 0); + } +} + +#define SJA_EN SYM_MASK(SPC_JTAG_ACCESS_REG, SPC_JTAG_ACCESS_EN) +#define BISTEN_LSB SYM_LSB(SPC_JTAG_ACCESS_REG, bist_en) + +#define R_OPCODE_LSB 3 +#define R_OP_NOP 0 +#define R_OP_SHIFT 2 +#define R_OP_UPDATE 3 +#define R_TDI_LSB 2 +#define R_TDO_LSB 1 +#define R_RDY 1 + +static int qib_r_grab(struct qib_devdata *dd) +{ + u64 val; + val = SJA_EN; + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + return 0; +} + +/* qib_r_wait_for_rdy() not only waits for the ready bit, it + * returns the current state of R_TDO + */ +static int qib_r_wait_for_rdy(struct qib_devdata *dd) +{ + u64 val; + int timeout; + for (timeout = 0; timeout < 100 ; ++timeout) { + val = qib_read_kreg32(dd, kr_r_access); + if (val & R_RDY) + return (val >> R_TDO_LSB) & 1; + } + return -1; +} + +static int qib_r_shift(struct qib_devdata *dd, int bisten, + int len, u8 *inp, u8 *outp) +{ + u64 valbase, val; + int ret, pos; + + valbase = SJA_EN | (bisten << BISTEN_LSB) | + (R_OP_SHIFT << R_OPCODE_LSB); + ret = qib_r_wait_for_rdy(dd); + if (ret < 0) + goto bail; + for (pos = 0; pos < len; ++pos) { + val = valbase; + if (outp) { + outp[pos >> 3] &= ~(1 << (pos & 7)); + outp[pos >> 3] |= (ret << (pos & 7)); + } + if (inp) { + int tdi = inp[pos >> 3] >> (pos & 7); + val |= ((tdi & 1) << R_TDI_LSB); + } + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + ret = qib_r_wait_for_rdy(dd); + if (ret < 0) + break; + } + /* Restore to NOP between operations. */ + val = SJA_EN | (bisten << BISTEN_LSB); + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + ret = qib_r_wait_for_rdy(dd); + + if (ret >= 0) + ret = pos; +bail: + return ret; +} + +static int qib_r_update(struct qib_devdata *dd, int bisten) +{ + u64 val; + int ret; + + val = SJA_EN | (bisten << BISTEN_LSB) | (R_OP_UPDATE << R_OPCODE_LSB); + ret = qib_r_wait_for_rdy(dd); + if (ret >= 0) { + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + } + return ret; +} + +#define BISTEN_PORT_SEL 15 +#define LEN_PORT_SEL 625 +#define BISTEN_AT 17 +#define LEN_AT 156 +#define BISTEN_ETM 16 +#define LEN_ETM 632 + +#define BIT2BYTE(x) (((x) + BITS_PER_BYTE - 1) / BITS_PER_BYTE) + +/* these are common for all IB port use cases. */ +static u8 reset_at[BIT2BYTE(LEN_AT)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, +}; +static u8 reset_atetm[BIT2BYTE(LEN_ETM)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0xe3, 0x81, 0x73, 0x3c, 0x70, 0x8e, + 0x07, 0xce, 0xf1, 0xc0, 0x39, 0x1e, 0x38, 0xc7, 0x03, 0xe7, + 0x78, 0xe0, 0x1c, 0x0f, 0x9c, 0x7f, 0x80, 0x73, 0x0f, 0x70, + 0xde, 0x01, 0xce, 0x39, 0xc0, 0xf9, 0x06, 0x38, 0xd7, 0x00, + 0xe7, 0x19, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, +}; +static u8 at[BIT2BYTE(LEN_AT)] = { + 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, +}; + +/* used for IB1 or IB2, only one in use */ +static u8 atetm_1port[BIT2BYTE(LEN_ETM)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0xf2, 0x80, 0x83, 0x1e, 0x38, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x50, 0xf4, 0x41, 0x00, 0x18, 0x78, 0xc8, 0x03, + 0x07, 0x7b, 0xa0, 0x3e, 0x00, 0x02, 0x00, 0x00, 0x18, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x4b, 0x00, 0x00, 0x00, +}; + +/* used when both IB1 and IB2 are in use */ +static u8 atetm_2port[BIT2BYTE(LEN_ETM)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, + 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xf8, 0x80, 0x83, 0x1e, 0x38, 0xe0, 0x03, 0x05, + 0x7b, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, + 0xa2, 0x0f, 0x50, 0xf4, 0x41, 0x00, 0x18, 0x78, 0xd1, 0x07, + 0x02, 0x7c, 0x80, 0x3e, 0x00, 0x02, 0x00, 0x00, 0x3e, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, +}; + +/* used when only IB1 is in use */ +static u8 portsel_port1[BIT2BYTE(LEN_PORT_SEL)] = { + 0x32, 0x65, 0xa4, 0x7b, 0x10, 0x98, 0xdc, 0xfe, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x73, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x78, 0x78, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x74, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, + 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, + 0x14, 0x14, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +/* used when only IB2 is in use */ +static u8 portsel_port2[BIT2BYTE(LEN_PORT_SEL)] = { + 0x32, 0x65, 0xa4, 0x7b, 0x10, 0x98, 0xdc, 0xfe, 0x39, 0x39, + 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x73, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, + 0x39, 0x78, 0x78, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, + 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x74, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, + 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, + 0x3a, 0x3a, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, +}; + +/* used when both IB1 and IB2 are in use */ +static u8 portsel_2port[BIT2BYTE(LEN_PORT_SEL)] = { + 0x32, 0xba, 0x54, 0x76, 0x10, 0x98, 0xdc, 0xfe, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x73, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x74, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x14, 0x14, 0x14, 0x14, 0x14, 0x3a, + 0x3a, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, + 0x14, 0x14, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +/* + * Do setup to properly handle IB link recovery; if port is zero, we + * are initializing to cover both ports; otherwise we are initializing + * to cover a single port card, or the port has reached INIT and we may + * need to switch coverage types. + */ +static void setup_7322_link_recovery(struct qib_pportdata *ppd, u32 both) +{ + u8 *portsel, *etm; + struct qib_devdata *dd = ppd->dd; + + if (!ppd->dd->cspec->r1) + return; + if (!both) { + dd->cspec->recovery_ports_initted++; + ppd->cpspec->recovery_init = 1; + } + if (!both && dd->cspec->recovery_ports_initted == 1) { + portsel = ppd->port == 1 ? portsel_port1 : portsel_port2; + etm = atetm_1port; + } else { + portsel = portsel_2port; + etm = atetm_2port; + } + + if (qib_r_grab(dd) < 0 || + qib_r_shift(dd, BISTEN_ETM, LEN_ETM, reset_atetm, NULL) < 0 || + qib_r_update(dd, BISTEN_ETM) < 0 || + qib_r_shift(dd, BISTEN_AT, LEN_AT, reset_at, NULL) < 0 || + qib_r_update(dd, BISTEN_AT) < 0 || + qib_r_shift(dd, BISTEN_PORT_SEL, LEN_PORT_SEL, + portsel, NULL) < 0 || + qib_r_update(dd, BISTEN_PORT_SEL) < 0 || + qib_r_shift(dd, BISTEN_AT, LEN_AT, at, NULL) < 0 || + qib_r_update(dd, BISTEN_AT) < 0 || + qib_r_shift(dd, BISTEN_ETM, LEN_ETM, etm, NULL) < 0 || + qib_r_update(dd, BISTEN_ETM) < 0) + qib_dev_err(dd, "Failed IB link recovery setup\n"); +} + +static void check_7322_rxe_status(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 fmask; + + if (dd->cspec->recovery_ports_initted != 1) + return; /* rest doesn't apply to dualport */ + qib_write_kreg(dd, kr_control, dd->control | + SYM_MASK(Control, FreezeMode)); + (void)qib_read_kreg64(dd, kr_scratch); + udelay(3); /* ibcreset asserted 400ns, be sure that's over */ + fmask = qib_read_kreg64(dd, kr_act_fmask); + if (!fmask) { + /* + * require a powercycle before we'll work again, and make + * sure we get no more interrupts, and don't turn off + * freeze. + */ + ppd->dd->cspec->stay_in_freeze = 1; + qib_7322_set_intr_state(ppd->dd, 0); + qib_write_kreg(dd, kr_fmask, 0ULL); + qib_dev_err(dd, "HCA unusable until powercycled\n"); + return; /* eventually reset */ + } + + qib_write_kreg(ppd->dd, kr_hwerrclear, + SYM_MASK(HwErrClear, IBSerdesPClkNotDetectClear_1)); + + /* don't do the full clear_freeze(), not needed for this */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + /* take IBC out of reset */ + if (ppd->link_speed_supported) { + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_read_kreg32(dd, kr_scratch); + if (ppd->lflags & QIBL_IB_LINK_DISABLED) + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + } +} diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c new file mode 100644 index 00000000..cf0cd30a --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -0,0 +1,1588 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "qib.h" +#include "qib_common.h" + +/* + * min buffers we want to have per context, after driver + */ +#define QIB_MIN_USER_CTXT_BUFCNT 7 + +#define QLOGIC_IB_R_SOFTWARE_MASK 0xFF +#define QLOGIC_IB_R_SOFTWARE_SHIFT 24 +#define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) + +/* + * Number of ctxts we are configured to use (to allow for more pio + * buffers per ctxt, etc.) Zero means use chip value. + */ +ushort qib_cfgctxts; +module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); +MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); + +/* + * If set, do not write to any regs if avoidable, hack to allow + * check for deranged default register values. + */ +ushort qib_mini_init; +module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); +MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); + +unsigned qib_n_krcv_queues; +module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); +MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); + +/* + * qib_wc_pat parameter: + * 0 is WC via MTRR + * 1 is WC via PAT + * If PAT initialization fails, code reverts back to MTRR + */ +unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ +module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); +MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); + +struct workqueue_struct *qib_cq_wq; + +static void verify_interrupt(unsigned long); + +static struct idr qib_unit_table; +u32 qib_cpulist_count; +unsigned long *qib_cpulist; + +/* set number of contexts we'll actually use */ +void qib_set_ctxtcnt(struct qib_devdata *dd) +{ + if (!qib_cfgctxts) { + dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); + if (dd->cfgctxts > dd->ctxtcnt) + dd->cfgctxts = dd->ctxtcnt; + } else if (qib_cfgctxts < dd->num_pports) + dd->cfgctxts = dd->ctxtcnt; + else if (qib_cfgctxts <= dd->ctxtcnt) + dd->cfgctxts = qib_cfgctxts; + else + dd->cfgctxts = dd->ctxtcnt; +} + +/* + * Common code for creating the receive context array. + */ +int qib_create_ctxts(struct qib_devdata *dd) +{ + unsigned i; + int ret; + + /* + * Allocate full ctxtcnt array, rather than just cfgctxts, because + * cleanup iterates across all possible ctxts. + */ + dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); + if (!dd->rcd) { + qib_dev_err(dd, "Unable to allocate ctxtdata array, " + "failing\n"); + ret = -ENOMEM; + goto done; + } + + /* create (one or more) kctxt */ + for (i = 0; i < dd->first_user_ctxt; ++i) { + struct qib_pportdata *ppd; + struct qib_ctxtdata *rcd; + + if (dd->skip_kctxt_mask & (1 << i)) + continue; + + ppd = dd->pport + (i % dd->num_pports); + rcd = qib_create_ctxtdata(ppd, i); + if (!rcd) { + qib_dev_err(dd, "Unable to allocate ctxtdata" + " for Kernel ctxt, failing\n"); + ret = -ENOMEM; + goto done; + } + rcd->pkeys[0] = QIB_DEFAULT_P_KEY; + rcd->seq_cnt = 1; + } + ret = 0; +done: + return ret; +} + +/* + * Common code for user and kernel context setup. + */ +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + + rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + if (rcd) { + INIT_LIST_HEAD(&rcd->qp_wait_list); + rcd->ppd = ppd; + rcd->dd = dd; + rcd->cnt = 1; + rcd->ctxt = ctxt; + dd->rcd[ctxt] = rcd; + + dd->f_init_ctxt(rcd); + + /* + * To avoid wasting a lot of memory, we allocate 32KB chunks + * of physically contiguous memory, advance through it until + * used up and then allocate more. Of course, we need + * memory to store those extra pointers, now. 32KB seems to + * be the most that is "safe" under memory pressure + * (creating large files and then copying them over + * NFS while doing lots of MPI jobs). The OOM killer can + * get invoked, even though we say we can sleep and this can + * cause significant system problems.... + */ + rcd->rcvegrbuf_size = 0x8000; + rcd->rcvegrbufs_perchunk = + rcd->rcvegrbuf_size / dd->rcvegrbufsize; + rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + + rcd->rcvegrbufs_perchunk - 1) / + rcd->rcvegrbufs_perchunk; + BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); + rcd->rcvegrbufs_perchunk_shift = + ilog2(rcd->rcvegrbufs_perchunk); + } + return rcd; +} + +/* + * Common code for initializing the physical port structure. + */ +void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, + u8 hw_pidx, u8 port) +{ + ppd->dd = dd; + ppd->hw_pidx = hw_pidx; + ppd->port = port; /* IB port number, not index */ + + spin_lock_init(&ppd->sdma_lock); + spin_lock_init(&ppd->lflags_lock); + init_waitqueue_head(&ppd->state_wait); + + init_timer(&ppd->symerr_clear_timer); + ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; + ppd->symerr_clear_timer.data = (unsigned long)ppd; +} + +static int init_pioavailregs(struct qib_devdata *dd) +{ + int ret, pidx; + u64 *status_page; + + dd->pioavailregs_dma = dma_alloc_coherent( + &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, + GFP_KERNEL); + if (!dd->pioavailregs_dma) { + qib_dev_err(dd, "failed to allocate PIOavail reg area " + "in memory\n"); + ret = -ENOMEM; + goto done; + } + + /* + * We really want L2 cache aligned, but for current CPUs of + * interest, they are the same. + */ + status_page = (u64 *) + ((char *) dd->pioavailregs_dma + + ((2 * L1_CACHE_BYTES + + dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); + /* device status comes first, for backwards compatibility */ + dd->devstatusp = status_page; + *status_page++ = 0; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + dd->pport[pidx].statusp = status_page; + *status_page++ = 0; + } + + /* + * Setup buffer to hold freeze and other messages, accessible to + * apps, following statusp. This is per-unit, not per port. + */ + dd->freezemsg = (char *) status_page; + *dd->freezemsg = 0; + /* length of msg buffer is "whatever is left" */ + ret = (char *) status_page - (char *) dd->pioavailregs_dma; + dd->freezelen = PAGE_SIZE - ret; + + ret = 0; + +done: + return ret; +} + +/** + * init_shadow_tids - allocate the shadow TID array + * @dd: the qlogic_ib device + * + * allocate the shadow TID array, so we can qib_munlock previous + * entries. It may make more sense to move the pageshadow to the + * ctxt data structure, so we only allocate memory for ctxts actually + * in use, since we at 8k per ctxt, now. + * We don't want failures here to prevent use of the driver/chip, + * so no return value. + */ +static void init_shadow_tids(struct qib_devdata *dd) +{ + struct page **pages; + dma_addr_t *addrs; + + pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); + if (!pages) { + qib_dev_err(dd, "failed to allocate shadow page * " + "array, no expected sends!\n"); + goto bail; + } + + addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); + if (!addrs) { + qib_dev_err(dd, "failed to allocate shadow dma handle " + "array, no expected sends!\n"); + goto bail_free; + } + + dd->pageshadow = pages; + dd->physshadow = addrs; + return; + +bail_free: + vfree(pages); +bail: + dd->pageshadow = NULL; +} + +/* + * Do initialization for device that is only needed on + * first detect, not on resets. + */ +static int loadtime_init(struct qib_devdata *dd) +{ + int ret = 0; + + if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & + QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { + qib_dev_err(dd, "Driver only handles version %d, " + "chip swversion is %d (%llx), failng\n", + QIB_CHIP_SWVERSION, + (int)(dd->revision >> + QLOGIC_IB_R_SOFTWARE_SHIFT) & + QLOGIC_IB_R_SOFTWARE_MASK, + (unsigned long long) dd->revision); + ret = -ENOSYS; + goto done; + } + + if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK) + qib_devinfo(dd->pcidev, "%s", dd->boardversion); + + spin_lock_init(&dd->pioavail_lock); + spin_lock_init(&dd->sendctrl_lock); + spin_lock_init(&dd->uctxt_lock); + spin_lock_init(&dd->qib_diag_trans_lock); + spin_lock_init(&dd->eep_st_lock); + mutex_init(&dd->eep_lock); + + if (qib_mini_init) + goto done; + + ret = init_pioavailregs(dd); + init_shadow_tids(dd); + + qib_get_eeprom_info(dd); + + /* setup time (don't start yet) to verify we got interrupt */ + init_timer(&dd->intrchk_timer); + dd->intrchk_timer.function = verify_interrupt; + dd->intrchk_timer.data = (unsigned long) dd; + +done: + return ret; +} + +/** + * init_after_reset - re-initialize after a reset + * @dd: the qlogic_ib device + * + * sanity check at least some of the values after reset, and + * ensure no receive or transmit (explicitly, in case reset + * failed + */ +static int init_after_reset(struct qib_devdata *dd) +{ + int i; + + /* + * Ensure chip does no sends or receives, tail updates, or + * pioavail updates while we re-initialize. This is mostly + * for the driver data structures, not chip registers. + */ + for (i = 0; i < dd->num_pports; ++i) { + /* + * ctxt == -1 means "all contexts". Only really safe for + * _dis_abling things, as here. + */ + dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS | + QIB_RCVCTRL_INTRAVAIL_DIS | + QIB_RCVCTRL_TAILUPD_DIS, -1); + /* Redundant across ports for some, but no big deal. */ + dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS | + QIB_SENDCTRL_AVAIL_DIS); + } + + return 0; +} + +static void enable_chip(struct qib_devdata *dd) +{ + u64 rcvmask; + int i; + + /* + * Enable PIO send, and update of PIOavail regs to memory. + */ + for (i = 0; i < dd->num_pports; ++i) + dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB | + QIB_SENDCTRL_AVAIL_ENB); + /* + * Enable kernel ctxts' receive and receive interrupt. + * Other ctxts done as user opens and inits them. + */ + rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB; + rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ? + QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB; + for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { + struct qib_ctxtdata *rcd = dd->rcd[i]; + + if (rcd) + dd->f_rcvctrl(rcd->ppd, rcvmask, i); + } + dd->freectxts = dd->cfgctxts - dd->first_user_ctxt; +} + +static void verify_interrupt(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + + if (!dd) + return; /* being torn down */ + + /* + * If we don't have a lid or any interrupts, let the user know and + * don't bother checking again. + */ + if (dd->int_counter == 0) { + if (!dd->f_intr_fallback(dd)) + dev_err(&dd->pcidev->dev, "No interrupts detected, " + "not usable.\n"); + else /* re-arm the timer to see if fallback works */ + mod_timer(&dd->intrchk_timer, jiffies + HZ/2); + } +} + +static void init_piobuf_state(struct qib_devdata *dd) +{ + int i, pidx; + u32 uctxts; + + /* + * Ensure all buffers are free, and fifos empty. Buffers + * are common, so only do once for port 0. + * + * After enable and qib_chg_pioavailkernel so we can safely + * enable pioavail updates and PIOENABLE. After this, packets + * are ready and able to go out. + */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH); + + /* + * If not all sendbufs are used, add the one to each of the lower + * numbered contexts. pbufsctxt and lastctxt_piobuf are + * calculated in chip-specific code because it may cause some + * chip-specific adjustments to be made. + */ + uctxts = dd->cfgctxts - dd->first_user_ctxt; + dd->ctxts_extrabuf = dd->pbufsctxt ? + dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0; + + /* + * Set up the shadow copies of the piobufavail registers, + * which we compare against the chip registers for now, and + * the in memory DMA'ed copies of the registers. + * By now pioavail updates to memory should have occurred, so + * copy them into our working/shadow registers; this is in + * case something went wrong with abort, but mostly to get the + * initial values of the generation bit correct. + */ + for (i = 0; i < dd->pioavregs; i++) { + __le64 tmp; + + tmp = dd->pioavailregs_dma[i]; + /* + * Don't need to worry about pioavailkernel here + * because we will call qib_chg_pioavailkernel() later + * in initialization, to busy out buffers as needed. + */ + dd->pioavailshadow[i] = le64_to_cpu(tmp); + } + while (i < ARRAY_SIZE(dd->pioavailshadow)) + dd->pioavailshadow[i++] = 0; /* for debugging sanity */ + + /* after pioavailshadow is setup */ + qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k, + TXCHK_CHG_TYPE_KERN, NULL); + dd->f_initvl15_bufs(dd); +} + +/** + * qib_init - do the actual initialization sequence on the chip + * @dd: the qlogic_ib device + * @reinit: reinitializing, so don't allocate new memory + * + * Do the actual initialization sequence on the chip. This is done + * both from the init routine called from the PCI infrastructure, and + * when we reset the chip, or detect that it was reset internally, + * or it's administratively re-enabled. + * + * Memory allocation here and in called routines is only done in + * the first case (reinit == 0). We have to be careful, because even + * without memory allocation, we need to re-write all the chip registers + * TIDs, etc. after the reset or enable has completed. + */ +int qib_init(struct qib_devdata *dd, int reinit) +{ + int ret = 0, pidx, lastfail = 0; + u32 portok = 0; + unsigned i; + struct qib_ctxtdata *rcd; + struct qib_pportdata *ppd; + unsigned long flags; + + /* Set linkstate to unknown, so we can watch for a transition. */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED | + QIBL_LINKDOWN | QIBL_LINKINIT | + QIBL_LINKV); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + + if (reinit) + ret = init_after_reset(dd); + else + ret = loadtime_init(dd); + if (ret) + goto done; + + /* Bypass most chip-init, to get to device creation */ + if (qib_mini_init) + return 0; + + ret = dd->f_late_initreg(dd); + if (ret) + goto done; + + /* dd->rcd can be NULL if early init failed */ + for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { + /* + * Set up the (kernel) rcvhdr queue and egr TIDs. If doing + * re-init, the simplest way to handle this is to free + * existing, and re-allocate. + * Need to re-create rest of ctxt 0 ctxtdata as well. + */ + rcd = dd->rcd[i]; + if (!rcd) + continue; + + lastfail = qib_create_rcvhdrq(dd, rcd); + if (!lastfail) + lastfail = qib_setup_eagerbufs(rcd); + if (lastfail) { + qib_dev_err(dd, "failed to allocate kernel ctxt's " + "rcvhdrq and/or egr bufs\n"); + continue; + } + } + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + int mtu; + if (lastfail) + ret = lastfail; + ppd = dd->pport + pidx; + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) { + mtu = QIB_DEFAULT_MTU; + qib_ibmtu = 0; /* don't leave invalid value */ + } + /* set max we can ever have for this driver load */ + ppd->init_ibmaxlen = min(mtu > 2048 ? + dd->piosize4k : dd->piosize2k, + dd->rcvegrbufsize + + (dd->rcvhdrentsize << 2)); + /* + * Have to initialize ibmaxlen, but this will normally + * change immediately in qib_set_mtu(). + */ + ppd->ibmaxlen = ppd->init_ibmaxlen; + qib_set_mtu(ppd, mtu); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + lastfail = dd->f_bringup_serdes(ppd); + if (lastfail) { + qib_devinfo(dd->pcidev, + "Failed to bringup IB port %u\n", ppd->port); + lastfail = -ENETDOWN; + continue; + } + + portok++; + } + + if (!portok) { + /* none of the ports initialized */ + if (!ret && lastfail) + ret = lastfail; + else if (!ret) + ret = -ENETDOWN; + /* but continue on, so we can debug cause */ + } + + enable_chip(dd); + + init_piobuf_state(dd); + +done: + if (!ret) { + /* chip is OK for user apps; mark it as initialized */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + /* + * Set status even if port serdes is not initialized + * so that diags will work. + */ + *ppd->statusp |= QIB_STATUS_CHIP_PRESENT | + QIB_STATUS_INITTED; + if (!ppd->link_speed_enabled) + continue; + if (dd->flags & QIB_HAS_SEND_DMA) + ret = qib_setup_sdma(ppd); + init_timer(&ppd->hol_timer); + ppd->hol_timer.function = qib_hol_event; + ppd->hol_timer.data = (unsigned long)ppd; + ppd->hol_state = QIB_HOL_UP; + } + + /* now we can enable all interrupts from the chip */ + dd->f_set_intr_state(dd, 1); + + /* + * Setup to verify we get an interrupt, and fallback + * to an alternate if necessary and possible. + */ + mod_timer(&dd->intrchk_timer, jiffies + HZ/2); + /* start stats retrieval timer */ + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); + } + + /* if ret is non-zero, we probably should do some cleanup here... */ + return ret; +} + +/* + * These next two routines are placeholders in case we don't have per-arch + * code for controlling write combining. If explicit control of write + * combining is not available, performance will probably be awful. + */ + +int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd) +{ + return -EOPNOTSUPP; +} + +void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) +{ +} + +static inline struct qib_devdata *__qib_lookup(int unit) +{ + return idr_find(&qib_unit_table, unit); +} + +struct qib_devdata *qib_lookup(int unit) +{ + struct qib_devdata *dd; + unsigned long flags; + + spin_lock_irqsave(&qib_devs_lock, flags); + dd = __qib_lookup(unit); + spin_unlock_irqrestore(&qib_devs_lock, flags); + + return dd; +} + +/* + * Stop the timers during unit shutdown, or after an error late + * in initialization. + */ +static void qib_stop_timers(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + int pidx; + + if (dd->stats_timer.data) { + del_timer_sync(&dd->stats_timer); + dd->stats_timer.data = 0; + } + if (dd->intrchk_timer.data) { + del_timer_sync(&dd->intrchk_timer); + dd->intrchk_timer.data = 0; + } + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->hol_timer.data) + del_timer_sync(&ppd->hol_timer); + if (ppd->led_override_timer.data) { + del_timer_sync(&ppd->led_override_timer); + atomic_set(&ppd->led_override_timer_active, 0); + } + if (ppd->symerr_clear_timer.data) + del_timer_sync(&ppd->symerr_clear_timer); + } +} + +/** + * qib_shutdown_device - shut down a device + * @dd: the qlogic_ib device + * + * This is called to make the device quiet when we are about to + * unload the driver, and also when the device is administratively + * disabled. It does not free any data structures. + * Everything it does has to be setup again by qib_init(dd, 1) + */ +static void qib_shutdown_device(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + unsigned pidx; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + spin_lock_irq(&ppd->lflags_lock); + ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE | + QIBL_LINKV); + spin_unlock_irq(&ppd->lflags_lock); + *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY); + } + dd->flags &= ~QIB_INITTED; + + /* mask interrupts, but not errors */ + dd->f_set_intr_state(dd, 0); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS | + QIB_RCVCTRL_CTXT_DIS | + QIB_RCVCTRL_INTRAVAIL_DIS | + QIB_RCVCTRL_PKEY_ENB, -1); + /* + * Gracefully stop all sends allowing any in progress to + * trickle out first. + */ + dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR); + } + + /* + * Enough for anything that's going to trickle out to have actually + * done so. + */ + udelay(20); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + dd->f_setextled(ppd, 0); /* make sure LEDs are off */ + + if (dd->flags & QIB_HAS_SEND_DMA) + qib_teardown_sdma(ppd); + + dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS | + QIB_SENDCTRL_SEND_DIS); + /* + * Clear SerdesEnable. + * We can't count on interrupts since we are stopping. + */ + dd->f_quiet_serdes(ppd); + } + + qib_update_eeprom_log(dd); +} + +/** + * qib_free_ctxtdata - free a context's allocated data + * @dd: the qlogic_ib device + * @rcd: the ctxtdata structure + * + * free up any allocated data for a context + * This should not touch anything that would affect a simultaneous + * re-allocation of context data, because it is called after qib_mutex + * is released (and can be called from reinit as well). + * It should never change any chip state, or global driver state. + */ +void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) +{ + if (!rcd) + return; + + if (rcd->rcvhdrq) { + dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, + rcd->rcvhdrq, rcd->rcvhdrq_phys); + rcd->rcvhdrq = NULL; + if (rcd->rcvhdrtail_kvaddr) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + rcd->rcvhdrtail_kvaddr, + rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrtail_kvaddr = NULL; + } + } + if (rcd->rcvegrbuf) { + unsigned e; + + for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { + void *base = rcd->rcvegrbuf[e]; + size_t size = rcd->rcvegrbuf_size; + + dma_free_coherent(&dd->pcidev->dev, size, + base, rcd->rcvegrbuf_phys[e]); + } + kfree(rcd->rcvegrbuf); + rcd->rcvegrbuf = NULL; + kfree(rcd->rcvegrbuf_phys); + rcd->rcvegrbuf_phys = NULL; + rcd->rcvegrbuf_chunks = 0; + } + + kfree(rcd->tid_pg_list); + vfree(rcd->user_event_mask); + vfree(rcd->subctxt_uregbase); + vfree(rcd->subctxt_rcvegrbuf); + vfree(rcd->subctxt_rcvhdr_base); + kfree(rcd); +} + +/* + * Perform a PIO buffer bandwidth write test, to verify proper system + * configuration. Even when all the setup calls work, occasionally + * BIOS or other issues can prevent write combining from working, or + * can cause other bandwidth problems to the chip. + * + * This test simply writes the same buffer over and over again, and + * measures close to the peak bandwidth to the chip (not testing + * data bandwidth to the wire). On chips that use an address-based + * trigger to send packets to the wire, this is easy. On chips that + * use a count to trigger, we want to make sure that the packet doesn't + * go out on the wire, or trigger flow control checks. + */ +static void qib_verify_pioperf(struct qib_devdata *dd) +{ + u32 pbnum, cnt, lcnt; + u32 __iomem *piobuf; + u32 *addr; + u64 msecs, emsecs; + + piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum); + if (!piobuf) { + qib_devinfo(dd->pcidev, + "No PIObufs for checking perf, skipping\n"); + return; + } + + /* + * Enough to give us a reasonable test, less than piobuf size, and + * likely multiple of store buffer length. + */ + cnt = 1024; + + addr = vmalloc(cnt); + if (!addr) { + qib_devinfo(dd->pcidev, + "Couldn't get memory for checking PIO perf," + " skipping\n"); + goto done; + } + + preempt_disable(); /* we want reasonably accurate elapsed time */ + msecs = 1 + jiffies_to_msecs(jiffies); + for (lcnt = 0; lcnt < 10000U; lcnt++) { + /* wait until we cross msec boundary */ + if (jiffies_to_msecs(jiffies) >= msecs) + break; + udelay(1); + } + + dd->f_set_armlaunch(dd, 0); + + /* + * length 0, no dwords actually sent + */ + writeq(0, piobuf); + qib_flush_wc(); + + /* + * This is only roughly accurate, since even with preempt we + * still take interrupts that could take a while. Running for + * >= 5 msec seems to get us "close enough" to accurate values. + */ + msecs = jiffies_to_msecs(jiffies); + for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { + qib_pio_copy(piobuf + 64, addr, cnt >> 2); + emsecs = jiffies_to_msecs(jiffies) - msecs; + } + + /* 1 GiB/sec, slightly over IB SDR line rate */ + if (lcnt < (emsecs * 1024U)) + qib_dev_err(dd, + "Performance problem: bandwidth to PIO buffers is " + "only %u MiB/sec\n", + lcnt / (u32) emsecs); + + preempt_enable(); + + vfree(addr); + +done: + /* disarm piobuf, so it's available again */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum)); + qib_sendbuf_done(dd, pbnum); + dd->f_set_armlaunch(dd, 1); +} + + +void qib_free_devdata(struct qib_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&qib_devs_lock, flags); + idr_remove(&qib_unit_table, dd->unit); + list_del(&dd->list); + spin_unlock_irqrestore(&qib_devs_lock, flags); + + ib_dealloc_device(&dd->verbs_dev.ibdev); +} + +/* + * Allocate our primary per-unit data structure. Must be done via verbs + * allocator, because the verbs cleanup process both does cleanup and + * free of the data structure. + * "extra" is for chip-specific data. + * + * Use the idr mechanism to get a unit number for this unit. + */ +struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) +{ + unsigned long flags; + struct qib_devdata *dd; + int ret; + + if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { + dd = ERR_PTR(-ENOMEM); + goto bail; + } + + dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); + if (!dd) { + dd = ERR_PTR(-ENOMEM); + goto bail; + } + + spin_lock_irqsave(&qib_devs_lock, flags); + ret = idr_get_new(&qib_unit_table, dd, &dd->unit); + if (ret >= 0) + list_add(&dd->list, &qib_dev_list); + spin_unlock_irqrestore(&qib_devs_lock, flags); + + if (ret < 0) { + qib_early_err(&pdev->dev, + "Could not allocate unit ID: error %d\n", -ret); + ib_dealloc_device(&dd->verbs_dev.ibdev); + dd = ERR_PTR(ret); + goto bail; + } + + if (!qib_cpulist_count) { + u32 count = num_online_cpus(); + qib_cpulist = kzalloc(BITS_TO_LONGS(count) * + sizeof(long), GFP_KERNEL); + if (qib_cpulist) + qib_cpulist_count = count; + else + qib_early_err(&pdev->dev, "Could not alloc cpulist " + "info, cpu affinity might be wrong\n"); + } + +bail: + return dd; +} + +/* + * Called from freeze mode handlers, and from PCI error + * reporting code. Should be paranoid about state of + * system and data structures. + */ +void qib_disable_after_error(struct qib_devdata *dd) +{ + if (dd->flags & QIB_INITTED) { + u32 pidx; + + dd->flags &= ~QIB_INITTED; + if (dd->pport) + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + struct qib_pportdata *ppd; + + ppd = dd->pport + pidx; + if (dd->flags & QIB_PRESENT) { + qib_set_linkstate(ppd, + QIB_IB_LINKDOWN_DISABLE); + dd->f_setextled(ppd, 0); + } + *ppd->statusp &= ~QIB_STATUS_IB_READY; + } + } + + /* + * Mark as having had an error for driver, and also + * for /sys and status word mapped to user programs. + * This marks unit as not usable, until reset. + */ + if (dd->devstatusp) + *dd->devstatusp |= QIB_STATUS_HWERROR; +} + +static void __devexit qib_remove_one(struct pci_dev *); +static int __devinit qib_init_one(struct pci_dev *, + const struct pci_device_id *); + +#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " +#define PFX QIB_DRV_NAME ": " + +static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { + { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, qib_pci_tbl); + +struct pci_driver qib_driver = { + .name = QIB_DRV_NAME, + .probe = qib_init_one, + .remove = __devexit_p(qib_remove_one), + .id_table = qib_pci_tbl, + .err_handler = &qib_pci_err_handler, +}; + +/* + * Do all the generic driver unit- and chip-independent memory + * allocation and initialization. + */ +static int __init qlogic_ib_init(void) +{ + int ret; + + ret = qib_dev_init(); + if (ret) + goto bail; + + qib_cq_wq = create_singlethread_workqueue("qib_cq"); + if (!qib_cq_wq) { + ret = -ENOMEM; + goto bail_dev; + } + + /* + * These must be called before the driver is registered with + * the PCI subsystem. + */ + idr_init(&qib_unit_table); + if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { + printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); + ret = -ENOMEM; + goto bail_cq_wq; + } + + ret = pci_register_driver(&qib_driver); + if (ret < 0) { + printk(KERN_ERR QIB_DRV_NAME + ": Unable to register driver: error %d\n", -ret); + goto bail_unit; + } + + /* not fatal if it doesn't work */ + if (qib_init_qibfs()) + printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); + goto bail; /* all OK */ + +bail_unit: + idr_destroy(&qib_unit_table); +bail_cq_wq: + destroy_workqueue(qib_cq_wq); +bail_dev: + qib_dev_cleanup(); +bail: + return ret; +} + +module_init(qlogic_ib_init); + +/* + * Do the non-unit driver cleanup, memory free, etc. at unload. + */ +static void __exit qlogic_ib_cleanup(void) +{ + int ret; + + ret = qib_exit_qibfs(); + if (ret) + printk(KERN_ERR QIB_DRV_NAME ": " + "Unable to cleanup counter filesystem: " + "error %d\n", -ret); + + pci_unregister_driver(&qib_driver); + + destroy_workqueue(qib_cq_wq); + + qib_cpulist_count = 0; + kfree(qib_cpulist); + + idr_destroy(&qib_unit_table); + qib_dev_cleanup(); +} + +module_exit(qlogic_ib_cleanup); + +/* this can only be called after a successful initialization */ +static void cleanup_device_data(struct qib_devdata *dd) +{ + int ctxt; + int pidx; + struct qib_ctxtdata **tmp; + unsigned long flags; + + /* users can't do anything more with chip */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].statusp) + *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; + + if (!qib_wc_pat) + qib_disable_wc(dd); + + if (dd->pioavailregs_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *) dd->pioavailregs_dma, + dd->pioavailregs_phys); + dd->pioavailregs_dma = NULL; + } + + if (dd->pageshadow) { + struct page **tmpp = dd->pageshadow; + dma_addr_t *tmpd = dd->physshadow; + int i, cnt = 0; + + for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { + int ctxt_tidbase = ctxt * dd->rcvtidcnt; + int maxtid = ctxt_tidbase + dd->rcvtidcnt; + + for (i = ctxt_tidbase; i < maxtid; i++) { + if (!tmpp[i]) + continue; + pci_unmap_page(dd->pcidev, tmpd[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + qib_release_user_pages(&tmpp[i], 1); + tmpp[i] = NULL; + cnt++; + } + } + + tmpp = dd->pageshadow; + dd->pageshadow = NULL; + vfree(tmpp); + } + + /* + * Free any resources still in use (usually just kernel contexts) + * at unload; we do for ctxtcnt, because that's what we allocate. + * We acquire lock to be really paranoid that rcd isn't being + * accessed from some interrupt-related code (that should not happen, + * but best to be sure). + */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + tmp = dd->rcd; + dd->rcd = NULL; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) { + struct qib_ctxtdata *rcd = tmp[ctxt]; + + tmp[ctxt] = NULL; /* debugging paranoia */ + qib_free_ctxtdata(dd, rcd); + } + kfree(tmp); + kfree(dd->boardname); +} + +/* + * Clean up on unit shutdown, or error during unit load after + * successful initialization. + */ +static void qib_postinit_cleanup(struct qib_devdata *dd) +{ + /* + * Clean up chip-specific stuff. + * We check for NULL here, because it's outside + * the kregbase check, and we need to call it + * after the free_irq. Thus it's possible that + * the function pointers were never initialized. + */ + if (dd->f_cleanup) + dd->f_cleanup(dd); + + qib_pcie_ddcleanup(dd); + + cleanup_device_data(dd); + + qib_free_devdata(dd); +} + +static int __devinit qib_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret, j, pidx, initfail; + struct qib_devdata *dd = NULL; + + ret = qib_pcie_init(pdev, ent); + if (ret) + goto bail; + + /* + * Do device-specific initialiation, function table setup, dd + * allocation, etc. + */ + switch (ent->device) { + case PCI_DEVICE_ID_QLOGIC_IB_6120: +#ifdef CONFIG_PCI_MSI + dd = qib_init_iba6120_funcs(pdev, ent); +#else + qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " + "work if CONFIG_PCI_MSI is not enabled\n", + ent->device); + dd = ERR_PTR(-ENODEV); +#endif + break; + + case PCI_DEVICE_ID_QLOGIC_IB_7220: + dd = qib_init_iba7220_funcs(pdev, ent); + break; + + case PCI_DEVICE_ID_QLOGIC_IB_7322: + dd = qib_init_iba7322_funcs(pdev, ent); + break; + + default: + qib_early_err(&pdev->dev, "Failing on unknown QLogic " + "deviceid 0x%x\n", ent->device); + ret = -ENODEV; + } + + if (IS_ERR(dd)) + ret = PTR_ERR(dd); + if (ret) + goto bail; /* error already printed */ + + /* do the generic initialization */ + initfail = qib_init(dd, 0); + + ret = qib_register_ib_device(dd); + + /* + * Now ready for use. this should be cleared whenever we + * detect a reset, or initiate one. If earlier failure, + * we still create devices, so diags, etc. can be used + * to determine cause of problem. + */ + if (!qib_mini_init && !initfail && !ret) + dd->flags |= QIB_INITTED; + + j = qib_device_create(dd); + if (j) + qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j); + j = qibfs_add(dd); + if (j) + qib_dev_err(dd, "Failed filesystem setup for counters: %d\n", + -j); + + if (qib_mini_init || initfail || ret) { + qib_stop_timers(dd); + flush_workqueue(ib_wq); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + dd->f_quiet_serdes(dd->pport + pidx); + if (qib_mini_init) + goto bail; + if (!j) { + (void) qibfs_remove(dd); + qib_device_remove(dd); + } + if (!ret) + qib_unregister_ib_device(dd); + qib_postinit_cleanup(dd); + if (initfail) + ret = initfail; + goto bail; + } + + if (!qib_wc_pat) { + ret = qib_enable_wc(dd); + if (ret) { + qib_dev_err(dd, "Write combining not enabled " + "(err %d): performance may be poor\n", + -ret); + ret = 0; + } + } + + qib_verify_pioperf(dd); +bail: + return ret; +} + +static void __devexit qib_remove_one(struct pci_dev *pdev) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + int ret; + + /* unregister from IB core */ + qib_unregister_ib_device(dd); + + /* + * Disable the IB link, disable interrupts on the device, + * clear dma engines, etc. + */ + if (!qib_mini_init) + qib_shutdown_device(dd); + + qib_stop_timers(dd); + + /* wait until all of our (qsfp) queue_work() calls complete */ + flush_workqueue(ib_wq); + + ret = qibfs_remove(dd); + if (ret) + qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n", + -ret); + + qib_device_remove(dd); + + qib_postinit_cleanup(dd); +} + +/** + * qib_create_rcvhdrq - create a receive header queue + * @dd: the qlogic_ib device + * @rcd: the context data + * + * This must be contiguous memory (from an i/o perspective), and must be + * DMA'able (which means for some systems, it will go through an IOMMU, + * or be forced into a low address range). + */ +int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) +{ + unsigned amt; + + if (!rcd->rcvhdrq) { + dma_addr_t phys_hdrqtail; + gfp_t gfp_flags; + + amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * + sizeof(u32), PAGE_SIZE); + gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? + GFP_USER : GFP_KERNEL; + rcd->rcvhdrq = dma_alloc_coherent( + &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, + gfp_flags | __GFP_COMP); + + if (!rcd->rcvhdrq) { + qib_dev_err(dd, "attempt to allocate %d bytes " + "for ctxt %u rcvhdrq failed\n", + amt, rcd->ctxt); + goto bail; + } + + if (rcd->ctxt >= dd->first_user_ctxt) { + rcd->user_event_mask = vmalloc_user(PAGE_SIZE); + if (!rcd->user_event_mask) + goto bail_free_hdrq; + } + + if (!(dd->flags & QIB_NODMA_RTAIL)) { + rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( + &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + gfp_flags); + if (!rcd->rcvhdrtail_kvaddr) + goto bail_free; + rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; + } + + rcd->rcvhdrq_size = amt; + } + + /* clear for security and sanity on each use */ + memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); + if (rcd->rcvhdrtail_kvaddr) + memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE); + return 0; + +bail_free: + qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " + "rcvhdrqtailaddr failed\n", rcd->ctxt); + vfree(rcd->user_event_mask); + rcd->user_event_mask = NULL; +bail_free_hdrq: + dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, + rcd->rcvhdrq_phys); + rcd->rcvhdrq = NULL; +bail: + return -ENOMEM; +} + +/** + * allocate eager buffers, both kernel and user contexts. + * @rcd: the context we are setting up. + * + * Allocate the eager TID buffers and program them into hip. + * They are no longer completely contiguous, we do multiple allocation + * calls. Otherwise we get the OOM code involved, by asking for too + * much per call, with disastrous results on some kernels. + */ +int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; + size_t size; + gfp_t gfp_flags; + + /* + * GFP_USER, but without GFP_FS, so buffer cache can be + * coalesced (we hope); otherwise, even at order 4, + * heavy filesystem activity makes these fail, and we can + * use compound pages. + */ + gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + + egrcnt = rcd->rcvegrcnt; + egroff = rcd->rcvegr_tid_base; + egrsize = dd->rcvegrbufsize; + + chunk = rcd->rcvegrbuf_chunks; + egrperchunk = rcd->rcvegrbufs_perchunk; + size = rcd->rcvegrbuf_size; + if (!rcd->rcvegrbuf) { + rcd->rcvegrbuf = + kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), + GFP_KERNEL); + if (!rcd->rcvegrbuf) + goto bail; + } + if (!rcd->rcvegrbuf_phys) { + rcd->rcvegrbuf_phys = + kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), + GFP_KERNEL); + if (!rcd->rcvegrbuf_phys) + goto bail_rcvegrbuf; + } + for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { + if (rcd->rcvegrbuf[e]) + continue; + rcd->rcvegrbuf[e] = + dma_alloc_coherent(&dd->pcidev->dev, size, + &rcd->rcvegrbuf_phys[e], + gfp_flags); + if (!rcd->rcvegrbuf[e]) + goto bail_rcvegrbuf_phys; + } + + rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0]; + + for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) { + dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; + unsigned i; + + /* clear for security and sanity on each use */ + memset(rcd->rcvegrbuf[chunk], 0, size); + + for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { + dd->f_put_tid(dd, e + egroff + + (u64 __iomem *) + ((char __iomem *) + dd->kregbase + + dd->rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, pa); + pa += egrsize; + } + cond_resched(); /* don't hog the cpu */ + } + + return 0; + +bail_rcvegrbuf_phys: + for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++) + dma_free_coherent(&dd->pcidev->dev, size, + rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]); + kfree(rcd->rcvegrbuf_phys); + rcd->rcvegrbuf_phys = NULL; +bail_rcvegrbuf: + kfree(rcd->rcvegrbuf); + rcd->rcvegrbuf = NULL; +bail: + return -ENOMEM; +} + +/* + * Note: Changes to this routine should be mirrored + * for the diagnostics routine qib_remap_ioaddr32(). + * There is also related code for VL15 buffers in qib_init_7322_variables(). + * The teardown code that unmaps is in qib_pcie_ddcleanup() + */ +int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) +{ + u64 __iomem *qib_kregbase = NULL; + void __iomem *qib_piobase = NULL; + u64 __iomem *qib_userbase = NULL; + u64 qib_kreglen; + u64 qib_pio2koffset = dd->piobufbase & 0xffffffff; + u64 qib_pio4koffset = dd->piobufbase >> 32; + u64 qib_pio2klen = dd->piobcnt2k * dd->palign; + u64 qib_pio4klen = dd->piobcnt4k * dd->align4k; + u64 qib_physaddr = dd->physaddr; + u64 qib_piolen; + u64 qib_userlen = 0; + + /* + * Free the old mapping because the kernel will try to reuse the + * old mapping and not create a new mapping with the + * write combining attribute. + */ + iounmap(dd->kregbase); + dd->kregbase = NULL; + + /* + * Assumes chip address space looks like: + * - kregs + sregs + cregs + uregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * or: + * - kregs + sregs + cregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * - uregs + */ + if (dd->piobcnt4k == 0) { + qib_kreglen = qib_pio2koffset; + qib_piolen = qib_pio2klen; + } else if (qib_pio2koffset < qib_pio4koffset) { + qib_kreglen = qib_pio2koffset; + qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen; + } else { + qib_kreglen = qib_pio4koffset; + qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen; + } + qib_piolen += vl15buflen; + /* Map just the configured ports (not all hw ports) */ + if (dd->uregbase > qib_kreglen) + qib_userlen = dd->ureg_align * dd->cfgctxts; + + /* Sanity checks passed, now create the new mappings */ + qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen); + if (!qib_kregbase) + goto bail; + + qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen); + if (!qib_piobase) + goto bail_kregbase; + + if (qib_userlen) { + qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase, + qib_userlen); + if (!qib_userbase) + goto bail_piobase; + } + + dd->kregbase = qib_kregbase; + dd->kregend = (u64 __iomem *) + ((char __iomem *) qib_kregbase + qib_kreglen); + dd->piobase = qib_piobase; + dd->pio2kbase = (void __iomem *) + (((char __iomem *) dd->piobase) + + qib_pio2koffset - qib_kreglen); + if (dd->piobcnt4k) + dd->pio4kbase = (void __iomem *) + (((char __iomem *) dd->piobase) + + qib_pio4koffset - qib_kreglen); + if (qib_userlen) + /* ureg will now be accessed relative to dd->userbase */ + dd->userbase = qib_userbase; + return 0; + +bail_piobase: + iounmap(qib_piobase); +bail_kregbase: + iounmap(qib_kregbase); +bail: + return -ENOMEM; +} diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c new file mode 100644 index 00000000..6ae57d23 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "qib.h" +#include "qib_common.h" + +/** + * qib_format_hwmsg - format a single hwerror message + * @msg message buffer + * @msgl length of message buffer + * @hwmsg message to add to message buffer + */ +static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) +{ + strlcat(msg, "[", msgl); + strlcat(msg, hwmsg, msgl); + strlcat(msg, "]", msgl); +} + +/** + * qib_format_hwerrors - format hardware error messages for display + * @hwerrs hardware errors bit vector + * @hwerrmsgs hardware error descriptions + * @nhwerrmsgs number of hwerrmsgs + * @msg message buffer + * @msgl message buffer length + */ +void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, char *msg, size_t msgl) +{ + int i; + + for (i = 0; i < nhwerrmsgs; i++) + if (hwerrs & hwerrmsgs[i].mask) + qib_format_hwmsg(msg, msgl, hwerrmsgs[i].msg); +} + +static void signal_ib_event(struct qib_pportdata *ppd, enum ib_event_type ev) +{ + struct ib_event event; + struct qib_devdata *dd = ppd->dd; + + event.device = &dd->verbs_dev.ibdev; + event.element.port_num = ppd->port; + event.event = ev; + ib_dispatch_event(&event); +} + +void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs) +{ + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + u32 lstate; + u8 ltstate; + enum ib_event_type ev = 0; + + lstate = dd->f_iblink_state(ibcs); /* linkstate */ + ltstate = dd->f_ibphys_portstate(ibcs); + + /* + * If linkstate transitions into INIT from any of the various down + * states, or if it transitions from any of the up (INIT or better) + * states into any of the down states (except link recovery), then + * call the chip-specific code to take appropriate actions. + * + * ppd->lflags could be 0 if this is the first time the interrupt + * handlers has been called but the link is already up. + */ + if (lstate >= IB_PORT_INIT && + (!ppd->lflags || (ppd->lflags & QIBL_LINKDOWN)) && + ltstate == IB_PHYSPORTSTATE_LINKUP) { + /* transitioned to UP */ + if (dd->f_ib_updown(ppd, 1, ibcs)) + goto skip_ibchange; /* chip-code handled */ + } else if (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE | QIBL_IB_FORCE_NOTIFY)) { + if (ltstate != IB_PHYSPORTSTATE_LINKUP && + ltstate <= IB_PHYSPORTSTATE_CFG_TRAIN && + dd->f_ib_updown(ppd, 0, ibcs)) + goto skip_ibchange; /* chip-code handled */ + qib_set_uevent_bits(ppd, _QIB_EVENT_LINKDOWN_BIT); + } + + if (lstate != IB_PORT_DOWN) { + /* lstate is INIT, ARMED, or ACTIVE */ + if (lstate != IB_PORT_ACTIVE) { + *ppd->statusp &= ~QIB_STATUS_IB_READY; + if (ppd->lflags & QIBL_LINKACTIVE) + ev = IB_EVENT_PORT_ERR; + spin_lock_irqsave(&ppd->lflags_lock, flags); + if (lstate == IB_PORT_ARMED) { + ppd->lflags |= QIBL_LINKARMED | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKINIT | + QIBL_LINKDOWN | QIBL_LINKACTIVE); + } else { + ppd->lflags |= QIBL_LINKINIT | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKARMED | + QIBL_LINKDOWN | QIBL_LINKACTIVE); + } + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + /* start a 75msec timer to clear symbol errors */ + mod_timer(&ppd->symerr_clear_timer, + msecs_to_jiffies(75)); + } else if (ltstate == IB_PHYSPORTSTATE_LINKUP && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* active, but not active defered */ + qib_hol_up(ppd); /* useful only for 6120 now */ + *ppd->statusp |= + QIB_STATUS_IB_READY | QIB_STATUS_IB_CONF; + qib_clear_symerror_on_linkup((unsigned long)ppd); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKACTIVE | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKINIT | + QIBL_LINKDOWN | QIBL_LINKARMED); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + if (dd->flags & QIB_HAS_SEND_DMA) + qib_sdma_process_event(ppd, + qib_sdma_event_e30_go_running); + ev = IB_EVENT_PORT_ACTIVE; + dd->f_setextled(ppd, 1); + } + } else { /* down */ + if (ppd->lflags & QIBL_LINKACTIVE) + ev = IB_EVENT_PORT_ERR; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKDOWN | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKINIT | + QIBL_LINKACTIVE | QIBL_LINKARMED); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + *ppd->statusp &= ~QIB_STATUS_IB_READY; + } + +skip_ibchange: + ppd->lastibcstat = ibcs; + if (ev) + signal_ib_event(ppd, ev); + return; +} + +void qib_clear_symerror_on_linkup(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + + if (ppd->lflags & QIBL_LINKACTIVE) + return; + + ppd->ibport_data.z_symbol_error_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR); +} + +/* + * Handle receive interrupts for user ctxts; this means a user + * process was waiting for a packet to arrive, and didn't want + * to poll. + */ +void qib_handle_urcv(struct qib_devdata *dd, u64 ctxtr) +{ + struct qib_ctxtdata *rcd; + unsigned long flags; + int i; + + spin_lock_irqsave(&dd->uctxt_lock, flags); + for (i = dd->first_user_ctxt; dd->rcd && i < dd->cfgctxts; i++) { + if (!(ctxtr & (1ULL << i))) + continue; + rcd = dd->rcd[i]; + if (!rcd || !rcd->cnt) + continue; + + if (test_and_clear_bit(QIB_CTXT_WAITING_RCV, &rcd->flag)) { + wake_up_interruptible(&rcd->wait); + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_DIS, + rcd->ctxt); + } else if (test_and_clear_bit(QIB_CTXT_WAITING_URG, + &rcd->flag)) { + rcd->urgent++; + wake_up_interruptible(&rcd->wait); + } + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); +} + +void qib_bad_intrstatus(struct qib_devdata *dd) +{ + static int allbits; + + /* separate routine, for better optimization of qib_intr() */ + + /* + * We print the message and disable interrupts, in hope of + * having a better chance of debugging the problem. + */ + qib_dev_err(dd, "Read of chip interrupt status failed" + " disabling interrupts\n"); + if (allbits++) { + /* disable interrupt delivery, something is very wrong */ + if (allbits == 2) + dd->f_set_intr_state(dd, 0); + if (allbits == 3) { + qib_dev_err(dd, "2nd bad interrupt status, " + "unregistering interrupts\n"); + dd->flags |= QIB_BADINTR; + dd->flags &= ~QIB_INITTED; + dd->f_free_irq(dd); + } + } +} diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c new file mode 100644 index 00000000..8fd19a47 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -0,0 +1,360 @@ +/* + * Copyright (c) 2006, 2007, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "qib.h" + +/** + * qib_alloc_lkey - allocate an lkey + * @rkt: lkey table in which to allocate the lkey + * @mr: memory region that this lkey protects + * + * Returns 1 if successful, otherwise returns 0. + */ + +int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) +{ + unsigned long flags; + u32 r; + u32 n; + int ret; + + spin_lock_irqsave(&rkt->lock, flags); + + /* Find the next available LKEY */ + r = rkt->next; + n = r; + for (;;) { + if (rkt->table[r] == NULL) + break; + r = (r + 1) & (rkt->max - 1); + if (r == n) { + spin_unlock_irqrestore(&rkt->lock, flags); + ret = 0; + goto bail; + } + } + rkt->next = (r + 1) & (rkt->max - 1); + /* + * Make sure lkey is never zero which is reserved to indicate an + * unrestricted LKEY. + */ + rkt->gen++; + mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | + ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) + << 8); + if (mr->lkey == 0) { + mr->lkey |= 1 << 8; + rkt->gen++; + } + rkt->table[r] = mr; + spin_unlock_irqrestore(&rkt->lock, flags); + + ret = 1; + +bail: + return ret; +} + +/** + * qib_free_lkey - free an lkey + * @rkt: table from which to free the lkey + * @lkey: lkey id to free + */ +int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr) +{ + unsigned long flags; + u32 lkey = mr->lkey; + u32 r; + int ret; + + spin_lock_irqsave(&dev->lk_table.lock, flags); + if (lkey == 0) { + if (dev->dma_mr && dev->dma_mr == mr) { + ret = atomic_read(&dev->dma_mr->refcount); + if (!ret) + dev->dma_mr = NULL; + } else + ret = 0; + } else { + r = lkey >> (32 - ib_qib_lkey_table_size); + ret = atomic_read(&dev->lk_table.table[r]->refcount); + if (!ret) + dev->lk_table.table[r] = NULL; + } + spin_unlock_irqrestore(&dev->lk_table.lock, flags); + + if (ret) + ret = -EBUSY; + return ret; +} + +/** + * qib_lkey_ok - check IB SGE for validity and initialize + * @rkt: table containing lkey to check SGE against + * @isge: outgoing internal SGE + * @sge: SGE to check + * @acc: access flags + * + * Return 1 if valid and successful, otherwise returns 0. + * + * Check the IB SGE for validity and initialize our internal version + * of it. + */ +int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, + struct qib_sge *isge, struct ib_sge *sge, int acc) +{ + struct qib_mregion *mr; + unsigned n, m; + size_t off; + unsigned long flags; + + /* + * We use LKEY == zero for kernel virtual addresses + * (see qib_get_dma_mr and qib_dma.c). + */ + spin_lock_irqsave(&rkt->lock, flags); + if (sge->lkey == 0) { + struct qib_ibdev *dev = to_idev(pd->ibpd.device); + + if (pd->user) + goto bail; + if (!dev->dma_mr) + goto bail; + atomic_inc(&dev->dma_mr->refcount); + spin_unlock_irqrestore(&rkt->lock, flags); + + isge->mr = dev->dma_mr; + isge->vaddr = (void *) sge->addr; + isge->length = sge->length; + isge->sge_length = sge->length; + isge->m = 0; + isge->n = 0; + goto ok; + } + mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; + if (unlikely(mr == NULL || mr->lkey != sge->lkey || + mr->pd != &pd->ibpd)) + goto bail; + + off = sge->addr - mr->user_base; + if (unlikely(sge->addr < mr->user_base || + off + sge->length > mr->length || + (mr->access_flags & acc) != acc)) + goto bail; + atomic_inc(&mr->refcount); + spin_unlock_irqrestore(&rkt->lock, flags); + + off += mr->offset; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } + } + } + isge->mr = mr; + isge->vaddr = mr->map[m]->segs[n].vaddr + off; + isge->length = mr->map[m]->segs[n].length - off; + isge->sge_length = sge->length; + isge->m = m; + isge->n = n; +ok: + return 1; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + return 0; +} + +/** + * qib_rkey_ok - check the IB virtual address, length, and RKEY + * @dev: infiniband device + * @ss: SGE state + * @len: length of data + * @vaddr: virtual address to place data + * @rkey: rkey to check + * @acc: access flags + * + * Return 1 if successful, otherwise 0. + */ +int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc) +{ + struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct qib_mregion *mr; + unsigned n, m; + size_t off; + unsigned long flags; + + /* + * We use RKEY == zero for kernel virtual addresses + * (see qib_get_dma_mr and qib_dma.c). + */ + spin_lock_irqsave(&rkt->lock, flags); + if (rkey == 0) { + struct qib_pd *pd = to_ipd(qp->ibqp.pd); + struct qib_ibdev *dev = to_idev(pd->ibpd.device); + + if (pd->user) + goto bail; + if (!dev->dma_mr) + goto bail; + atomic_inc(&dev->dma_mr->refcount); + spin_unlock_irqrestore(&rkt->lock, flags); + + sge->mr = dev->dma_mr; + sge->vaddr = (void *) vaddr; + sge->length = len; + sge->sge_length = len; + sge->m = 0; + sge->n = 0; + goto ok; + } + + mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; + if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + goto bail; + + off = vaddr - mr->iova; + if (unlikely(vaddr < mr->iova || off + len > mr->length || + (mr->access_flags & acc) == 0)) + goto bail; + atomic_inc(&mr->refcount); + spin_unlock_irqrestore(&rkt->lock, flags); + + off += mr->offset; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } + } + } + sge->mr = mr; + sge->vaddr = mr->map[m]->segs[n].vaddr + off; + sge->length = mr->map[m]->segs[n].length - off; + sge->sge_length = len; + sge->m = m; + sge->n = n; +ok: + return 1; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + return 0; +} + +/* + * Initialize the memory region specified by the work reqeust. + */ +int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) +{ + struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct qib_pd *pd = to_ipd(qp->ibqp.pd); + struct qib_mregion *mr; + u32 rkey = wr->wr.fast_reg.rkey; + unsigned i, n, m; + int ret = -EINVAL; + unsigned long flags; + u64 *page_list; + size_t ps; + + spin_lock_irqsave(&rkt->lock, flags); + if (pd->user || rkey == 0) + goto bail; + + mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; + if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) + goto bail; + + if (wr->wr.fast_reg.page_list_len > mr->max_segs) + goto bail; + + ps = 1UL << wr->wr.fast_reg.page_shift; + if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) + goto bail; + + mr->user_base = wr->wr.fast_reg.iova_start; + mr->iova = wr->wr.fast_reg.iova_start; + mr->lkey = rkey; + mr->length = wr->wr.fast_reg.length; + mr->access_flags = wr->wr.fast_reg.access_flags; + page_list = wr->wr.fast_reg.page_list->page_list; + m = 0; + n = 0; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + mr->map[m]->segs[n].vaddr = (void *) page_list[i]; + mr->map[m]->segs[n].length = ps; + if (++n == QIB_SEGSZ) { + m++; + n = 0; + } + } + + ret = 0; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c new file mode 100644 index 00000000..c4ff7888 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -0,0 +1,2178 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "qib.h" +#include "qib_mad.h" + +static int reply(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) +{ + struct ib_mad_send_buf *send_buf; + struct ib_mad_agent *agent; + struct ib_smp *smp; + int ret; + unsigned long flags; + unsigned long timeout; + + agent = ibp->send_agent; + if (!agent) + return; + + /* o14-3.2.1 */ + if (!(ppd_from_ibp(ibp)->lflags & QIBL_LINKACTIVE)) + return; + + /* o14-2 */ + if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout)) + return; + + send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + return; + + smp = send_buf->mad; + smp->base_version = IB_MGMT_BASE_VERSION; + smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + smp->class_version = 1; + smp->method = IB_MGMT_METHOD_TRAP; + ibp->tid++; + smp->tid = cpu_to_be64(ibp->tid); + smp->attr_id = IB_SMP_ATTR_NOTICE; + /* o14-1: smp->mkey = 0; */ + memcpy(smp->data, data, len); + + spin_lock_irqsave(&ibp->lock, flags); + if (!ibp->sm_ah) { + if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { + struct ib_ah *ah; + struct ib_ah_attr attr; + + memset(&attr, 0, sizeof attr); + attr.dlid = ibp->sm_lid; + attr.port_num = ppd_from_ibp(ibp)->port; + ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + if (IS_ERR(ah)) + ret = -EINVAL; + else { + send_buf->ah = ah; + ibp->sm_ah = to_iah(ah); + ret = 0; + } + } else + ret = -EINVAL; + } else { + send_buf->ah = &ibp->sm_ah->ibah; + ret = 0; + } + spin_unlock_irqrestore(&ibp->lock, flags); + + if (!ret) + ret = ib_post_send_mad(send_buf, NULL); + if (!ret) { + /* 4.096 usec. */ + timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000; + ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout); + } else { + ib_free_send_mad(send_buf); + ibp->trap_timeout = 0; + } +} + +/* + * Send a bad [PQ]_Key trap (ch. 14.3.8). + */ +void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) +{ + struct ib_mad_notice_attr data; + + if (trap_num == IB_NOTICE_TRAP_BAD_PKEY) + ibp->pkey_violations++; + else + ibp->qkey_violations++; + ibp->n_pkt_drops++; + + /* Send violation trap */ + data.generic_type = IB_NOTICE_TYPE_SECURITY; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = trap_num; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_257_258.lid1 = lid1; + data.details.ntc_257_258.lid2 = lid2; + data.details.ntc_257_258.key = cpu_to_be32(key); + data.details.ntc_257_258.sl_qp1 = cpu_to_be32((sl << 28) | qp1); + data.details.ntc_257_258.qp2 = cpu_to_be32(qp2); + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a bad M_Key trap (ch. 14.3.9). + */ +static void qib_bad_mkey(struct qib_ibport *ibp, struct ib_smp *smp) +{ + struct ib_mad_notice_attr data; + + /* Send violation trap */ + data.generic_type = IB_NOTICE_TYPE_SECURITY; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_BAD_MKEY; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_256.lid = data.issuer_lid; + data.details.ntc_256.method = smp->method; + data.details.ntc_256.attr_id = smp->attr_id; + data.details.ntc_256.attr_mod = smp->attr_mod; + data.details.ntc_256.mkey = smp->mkey; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + u8 hop_cnt; + + data.details.ntc_256.dr_slid = smp->dr_slid; + data.details.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; + hop_cnt = smp->hop_cnt; + if (hop_cnt > ARRAY_SIZE(data.details.ntc_256.dr_rtn_path)) { + data.details.ntc_256.dr_trunc_hop |= + IB_NOTICE_TRAP_DR_TRUNC; + hop_cnt = ARRAY_SIZE(data.details.ntc_256.dr_rtn_path); + } + data.details.ntc_256.dr_trunc_hop |= hop_cnt; + memcpy(data.details.ntc_256.dr_rtn_path, smp->return_path, + hop_cnt); + } + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a Port Capability Mask Changed trap (ch. 14.3.11). + */ +void qib_cap_mask_chg(struct qib_ibport *ibp) +{ + struct ib_mad_notice_attr data; + + data.generic_type = IB_NOTICE_TYPE_INFO; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_CAP_MASK_CHG; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_144.lid = data.issuer_lid; + data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags); + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a System Image GUID Changed trap (ch. 14.3.12). + */ +void qib_sys_guid_chg(struct qib_ibport *ibp) +{ + struct ib_mad_notice_attr data; + + data.generic_type = IB_NOTICE_TYPE_INFO; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_SYS_GUID_CHG; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_145.lid = data.issuer_lid; + data.details.ntc_145.new_sys_guid = ib_qib_sys_image_guid; + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a Node Description Changed trap (ch. 14.3.13). + */ +void qib_node_desc_chg(struct qib_ibport *ibp) +{ + struct ib_mad_notice_attr data; + + data.generic_type = IB_NOTICE_TYPE_INFO; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_CAP_MASK_CHG; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_144.lid = data.issuer_lid; + data.details.ntc_144.local_changes = 1; + data.details.ntc_144.change_flags = IB_NOTICE_TRAP_NODE_DESC_CHG; + + qib_send_trap(ibp, &data, sizeof data); +} + +static int subn_get_nodedescription(struct ib_smp *smp, + struct ib_device *ibdev) +{ + if (smp->attr_mod) + smp->status |= IB_SMP_INVALID_FIELD; + + memcpy(smp->data, ibdev->node_desc, sizeof(smp->data)); + + return reply(smp); +} + +static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct ib_node_info *nip = (struct ib_node_info *)&smp->data; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + u32 vendor, majrev, minrev; + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + /* GUID 0 is illegal */ + if (smp->attr_mod || pidx >= dd->num_pports || + dd->pport[pidx].guid == 0) + smp->status |= IB_SMP_INVALID_FIELD; + else + nip->port_guid = dd->pport[pidx].guid; + + nip->base_version = 1; + nip->class_version = 1; + nip->node_type = 1; /* channel adapter */ + nip->num_ports = ibdev->phys_port_cnt; + /* This is already in network order */ + nip->sys_guid = ib_qib_sys_image_guid; + nip->node_guid = dd->pport->guid; /* Use first-port GUID as node */ + nip->partition_cap = cpu_to_be16(qib_get_npkeys(dd)); + nip->device_id = cpu_to_be16(dd->deviceid); + majrev = dd->majrev; + minrev = dd->minrev; + nip->revision = cpu_to_be32((majrev << 16) | minrev); + nip->local_port_num = port; + vendor = dd->vendorid; + nip->vendor_id[0] = QIB_SRC_OUI_1; + nip->vendor_id[1] = QIB_SRC_OUI_2; + nip->vendor_id[2] = QIB_SRC_OUI_3; + + return reply(smp); +} + +static int subn_get_guidinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + u32 startgx = 8 * be32_to_cpu(smp->attr_mod); + __be64 *p = (__be64 *) smp->data; + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + /* 32 blocks of 8 64-bit GUIDs per block */ + + memset(smp->data, 0, sizeof(smp->data)); + + if (startgx == 0 && pidx < dd->num_pports) { + struct qib_pportdata *ppd = dd->pport + pidx; + struct qib_ibport *ibp = &ppd->ibport_data; + __be64 g = ppd->guid; + unsigned i; + + /* GUID 0 is illegal */ + if (g == 0) + smp->status |= IB_SMP_INVALID_FIELD; + else { + /* The first is a copy of the read-only HW GUID. */ + p[0] = g; + for (i = 1; i < QIB_GUIDS_PER_PORT; i++) + p[i] = ibp->guids[i - 1]; + } + } else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static void set_link_width_enabled(struct qib_pportdata *ppd, u32 w) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LWID_ENB, w); +} + +static void set_link_speed_enabled(struct qib_pportdata *ppd, u32 s) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_SPD_ENB, s); +} + +static int get_overrunthreshold(struct qib_pportdata *ppd) +{ + return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OVERRUN_THRESH); +} + +/** + * set_overrunthreshold - set the overrun threshold + * @ppd: the physical port data + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_overrunthreshold(struct qib_pportdata *ppd, unsigned n) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OVERRUN_THRESH, + (u32)n); + return 0; +} + +static int get_phyerrthreshold(struct qib_pportdata *ppd) +{ + return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PHYERR_THRESH); +} + +/** + * set_phyerrthreshold - set the physical error threshold + * @ppd: the physical port data + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_phyerrthreshold(struct qib_pportdata *ppd, unsigned n) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PHYERR_THRESH, + (u32)n); + return 0; +} + +/** + * get_linkdowndefaultstate - get the default linkdown state + * @ppd: the physical port data + * + * Returns zero if the default is POLL, 1 if the default is SLEEP. + */ +static int get_linkdowndefaultstate(struct qib_pportdata *ppd) +{ + return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT) == + IB_LINKINITCMD_SLEEP; +} + +static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) +{ + int ret = 0; + + /* Is the mkey in the process of expiring? */ + if (ibp->mkey_lease_timeout && + time_after_eq(jiffies, ibp->mkey_lease_timeout)) { + /* Clear timeout and mkey protection field. */ + ibp->mkey_lease_timeout = 0; + ibp->mkeyprot = 0; + } + + /* M_Key checking depends on Portinfo:M_Key_protect_bits */ + if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && ibp->mkey != 0 && + ibp->mkey != smp->mkey && + (smp->method == IB_MGMT_METHOD_SET || + smp->method == IB_MGMT_METHOD_TRAP_REPRESS || + (smp->method == IB_MGMT_METHOD_GET && ibp->mkeyprot >= 2))) { + if (ibp->mkey_violations != 0xFFFF) + ++ibp->mkey_violations; + if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) + ibp->mkey_lease_timeout = jiffies + + ibp->mkey_lease_period * HZ; + /* Generate a trap notice. */ + qib_bad_mkey(ibp, smp); + ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } else if (ibp->mkey_lease_timeout) + ibp->mkey_lease_timeout = 0; + + return ret; +} + +static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_devdata *dd; + struct qib_pportdata *ppd; + struct qib_ibport *ibp; + struct ib_port_info *pip = (struct ib_port_info *)smp->data; + u8 mtu; + int ret; + u32 state; + u32 port_num = be32_to_cpu(smp->attr_mod); + + if (port_num == 0) + port_num = port; + else { + if (port_num > ibdev->phys_port_cnt) { + smp->status |= IB_SMP_INVALID_FIELD; + ret = reply(smp); + goto bail; + } + if (port_num != port) { + ibp = to_iport(ibdev, port_num); + ret = check_mkey(ibp, smp, 0); + if (ret) + goto bail; + } + } + + dd = dd_from_ibdev(ibdev); + /* IB numbers ports from 1, hdw from 0 */ + ppd = dd->pport + (port_num - 1); + ibp = &ppd->ibport_data; + + /* Clear all fields. Only set the non-zero fields. */ + memset(smp->data, 0, sizeof(smp->data)); + + /* Only return the mkey if the protection field allows it. */ + if (!(smp->method == IB_MGMT_METHOD_GET && + ibp->mkey != smp->mkey && + ibp->mkeyprot == 1)) + pip->mkey = ibp->mkey; + pip->gid_prefix = ibp->gid_prefix; + pip->lid = cpu_to_be16(ppd->lid); + pip->sm_lid = cpu_to_be16(ibp->sm_lid); + pip->cap_mask = cpu_to_be32(ibp->port_cap_flags); + /* pip->diag_code; */ + pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period); + pip->local_port_num = port; + pip->link_width_enabled = ppd->link_width_enabled; + pip->link_width_supported = ppd->link_width_supported; + pip->link_width_active = ppd->link_width_active; + state = dd->f_iblink_state(ppd->lastibcstat); + pip->linkspeed_portstate = ppd->link_speed_supported << 4 | state; + + pip->portphysstate_linkdown = + (dd->f_ibphys_portstate(ppd->lastibcstat) << 4) | + (get_linkdowndefaultstate(ppd) ? 1 : 2); + pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc; + pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) | + ppd->link_speed_enabled; + switch (ppd->ibmtu) { + default: /* something is wrong; fall through */ + case 4096: + mtu = IB_MTU_4096; + break; + case 2048: + mtu = IB_MTU_2048; + break; + case 1024: + mtu = IB_MTU_1024; + break; + case 512: + mtu = IB_MTU_512; + break; + case 256: + mtu = IB_MTU_256; + break; + } + pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl; + pip->vlcap_inittype = ppd->vls_supported << 4; /* InitType = 0 */ + pip->vl_high_limit = ibp->vl_high_limit; + pip->vl_arb_high_cap = + dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP); + pip->vl_arb_low_cap = + dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_LOW_CAP); + /* InitTypeReply = 0 */ + pip->inittypereply_mtucap = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; + /* HCAs ignore VLStallCount and HOQLife */ + /* pip->vlstallcnt_hoqlife; */ + pip->operationalvl_pei_peo_fpi_fpo = + dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4; + pip->mkey_violations = cpu_to_be16(ibp->mkey_violations); + /* P_KeyViolations are counted by hardware. */ + pip->pkey_violations = cpu_to_be16(ibp->pkey_violations); + pip->qkey_violations = cpu_to_be16(ibp->qkey_violations); + /* Only the hardware GUID is supported for now */ + pip->guid_cap = QIB_GUIDS_PER_PORT; + pip->clientrereg_resv_subnetto = ibp->subnet_timeout; + /* 32.768 usec. response time (guessing) */ + pip->resv_resptimevalue = 3; + pip->localphyerrors_overrunerrors = + (get_phyerrthreshold(ppd) << 4) | + get_overrunthreshold(ppd); + /* pip->max_credit_hint; */ + if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) { + u32 v; + + v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY); + pip->link_roundtrip_latency[0] = v >> 16; + pip->link_roundtrip_latency[1] = v >> 8; + pip->link_roundtrip_latency[2] = v; + } + + ret = reply(smp); + +bail: + return ret; +} + +/** + * get_pkeys - return the PKEY table + * @dd: the qlogic_ib device + * @port: the IB port number + * @pkeys: the pkey table is placed here + */ +static int get_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) +{ + struct qib_pportdata *ppd = dd->pport + port - 1; + /* + * always a kernel context, no locking needed. + * If we get here with ppd setup, no need to check + * that pd is valid. + */ + struct qib_ctxtdata *rcd = dd->rcd[ppd->hw_pidx]; + + memcpy(pkeys, rcd->pkeys, sizeof(rcd->pkeys)); + + return 0; +} + +static int subn_get_pkeytable(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); + u16 *p = (u16 *) smp->data; + __be16 *q = (__be16 *) smp->data; + + /* 64 blocks of 32 16-bit P_Key entries */ + + memset(smp->data, 0, sizeof(smp->data)); + if (startpx == 0) { + struct qib_devdata *dd = dd_from_ibdev(ibdev); + unsigned i, n = qib_get_npkeys(dd); + + get_pkeys(dd, port, p); + + for (i = 0; i < n; i++) + q[i] = cpu_to_be16(p[i]); + } else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static int subn_set_guidinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + u32 startgx = 8 * be32_to_cpu(smp->attr_mod); + __be64 *p = (__be64 *) smp->data; + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + /* 32 blocks of 8 64-bit GUIDs per block */ + + if (startgx == 0 && pidx < dd->num_pports) { + struct qib_pportdata *ppd = dd->pport + pidx; + struct qib_ibport *ibp = &ppd->ibport_data; + unsigned i; + + /* The first entry is read-only. */ + for (i = 1; i < QIB_GUIDS_PER_PORT; i++) + ibp->guids[i - 1] = p[i]; + } else + smp->status |= IB_SMP_INVALID_FIELD; + + /* The only GUID we support is the first read-only entry. */ + return subn_get_guidinfo(smp, ibdev, port); +} + +/** + * subn_set_portinfo - set port information + * @smp: the incoming SM packet + * @ibdev: the infiniband device + * @port: the port on the device + * + * Set Portinfo (see ch. 14.2.5.6). + */ +static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct ib_port_info *pip = (struct ib_port_info *)smp->data; + struct ib_event event; + struct qib_devdata *dd; + struct qib_pportdata *ppd; + struct qib_ibport *ibp; + char clientrereg = 0; + unsigned long flags; + u16 lid, smlid; + u8 lwe; + u8 lse; + u8 state; + u8 vls; + u8 msl; + u16 lstate; + int ret, ore, mtu; + u32 port_num = be32_to_cpu(smp->attr_mod); + + if (port_num == 0) + port_num = port; + else { + if (port_num > ibdev->phys_port_cnt) + goto err; + /* Port attributes can only be set on the receiving port */ + if (port_num != port) + goto get_only; + } + + dd = dd_from_ibdev(ibdev); + /* IB numbers ports from 1, hdw from 0 */ + ppd = dd->pport + (port_num - 1); + ibp = &ppd->ibport_data; + event.device = ibdev; + event.element.port_num = port; + + ibp->mkey = pip->mkey; + ibp->gid_prefix = pip->gid_prefix; + ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); + + lid = be16_to_cpu(pip->lid); + /* Must be a valid unicast LID address. */ + if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE) + smp->status |= IB_SMP_INVALID_FIELD; + else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) { + if (ppd->lid != lid) + qib_set_uevent_bits(ppd, _QIB_EVENT_LID_CHANGE_BIT); + if (ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) + qib_set_uevent_bits(ppd, _QIB_EVENT_LMC_CHANGE_BIT); + qib_set_lid(ppd, lid, pip->mkeyprot_resv_lmc & 7); + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + + smlid = be16_to_cpu(pip->sm_lid); + msl = pip->neighbormtu_mastersmsl & 0xF; + /* Must be a valid unicast LID address. */ + if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE) + smp->status |= IB_SMP_INVALID_FIELD; + else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { + spin_lock_irqsave(&ibp->lock, flags); + if (ibp->sm_ah) { + if (smlid != ibp->sm_lid) + ibp->sm_ah->attr.dlid = smlid; + if (msl != ibp->sm_sl) + ibp->sm_ah->attr.sl = msl; + } + spin_unlock_irqrestore(&ibp->lock, flags); + if (smlid != ibp->sm_lid) + ibp->sm_lid = smlid; + if (msl != ibp->sm_sl) + ibp->sm_sl = msl; + event.event = IB_EVENT_SM_CHANGE; + ib_dispatch_event(&event); + } + + /* Allow 1x or 4x to be set (see 14.2.6.6). */ + lwe = pip->link_width_enabled; + if (lwe) { + if (lwe == 0xFF) + set_link_width_enabled(ppd, ppd->link_width_supported); + else if (lwe >= 16 || (lwe & ~ppd->link_width_supported)) + smp->status |= IB_SMP_INVALID_FIELD; + else if (lwe != ppd->link_width_enabled) + set_link_width_enabled(ppd, lwe); + } + + lse = pip->linkspeedactive_enabled & 0xF; + if (lse) { + /* + * The IB 1.2 spec. only allows link speed values + * 1, 3, 5, 7, 15. 1.2.1 extended to allow specific + * speeds. + */ + if (lse == 15) + set_link_speed_enabled(ppd, + ppd->link_speed_supported); + else if (lse >= 8 || (lse & ~ppd->link_speed_supported)) + smp->status |= IB_SMP_INVALID_FIELD; + else if (lse != ppd->link_speed_enabled) + set_link_speed_enabled(ppd, lse); + } + + /* Set link down default state. */ + switch (pip->portphysstate_linkdown & 0xF) { + case 0: /* NOP */ + break; + case 1: /* SLEEP */ + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT, + IB_LINKINITCMD_SLEEP); + break; + case 2: /* POLL */ + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT, + IB_LINKINITCMD_POLL); + break; + default: + smp->status |= IB_SMP_INVALID_FIELD; + } + + ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6; + ibp->vl_high_limit = pip->vl_high_limit; + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT, + ibp->vl_high_limit); + + mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF); + if (mtu == -1) + smp->status |= IB_SMP_INVALID_FIELD; + else + qib_set_mtu(ppd, mtu); + + /* Set operational VLs */ + vls = (pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF; + if (vls) { + if (vls > ppd->vls_supported) + smp->status |= IB_SMP_INVALID_FIELD; + else + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls); + } + + if (pip->mkey_violations == 0) + ibp->mkey_violations = 0; + + if (pip->pkey_violations == 0) + ibp->pkey_violations = 0; + + if (pip->qkey_violations == 0) + ibp->qkey_violations = 0; + + ore = pip->localphyerrors_overrunerrors; + if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF)) + smp->status |= IB_SMP_INVALID_FIELD; + + if (set_overrunthreshold(ppd, (ore & 0xF))) + smp->status |= IB_SMP_INVALID_FIELD; + + ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; + + if (pip->clientrereg_resv_subnetto & 0x80) { + clientrereg = 1; + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); + } + + /* + * Do the port state change now that the other link parameters + * have been set. + * Changing the port physical state only makes sense if the link + * is down or is being set to down. + */ + state = pip->linkspeed_portstate & 0xF; + lstate = (pip->portphysstate_linkdown >> 4) & 0xF; + if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) + smp->status |= IB_SMP_INVALID_FIELD; + + /* + * Only state changes of DOWN, ARM, and ACTIVE are valid + * and must be in the correct state to take effect (see 7.2.6). + */ + switch (state) { + case IB_PORT_NOP: + if (lstate == 0) + break; + /* FALLTHROUGH */ + case IB_PORT_DOWN: + if (lstate == 0) + lstate = QIB_IB_LINKDOWN_ONLY; + else if (lstate == 1) + lstate = QIB_IB_LINKDOWN_SLEEP; + else if (lstate == 2) + lstate = QIB_IB_LINKDOWN; + else if (lstate == 3) + lstate = QIB_IB_LINKDOWN_DISABLE; + else { + smp->status |= IB_SMP_INVALID_FIELD; + break; + } + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + qib_set_linkstate(ppd, lstate); + /* + * Don't send a reply if the response would be sent + * through the disabled port. + */ + if (lstate == QIB_IB_LINKDOWN_DISABLE && smp->hop_cnt) { + ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + goto done; + } + qib_wait_linkstate(ppd, QIBL_LINKV, 10); + break; + case IB_PORT_ARMED: + qib_set_linkstate(ppd, QIB_IB_LINKARM); + break; + case IB_PORT_ACTIVE: + qib_set_linkstate(ppd, QIB_IB_LINKACTIVE); + break; + default: + smp->status |= IB_SMP_INVALID_FIELD; + } + + ret = subn_get_portinfo(smp, ibdev, port); + + if (clientrereg) + pip->clientrereg_resv_subnetto |= 0x80; + + goto get_only; + +err: + smp->status |= IB_SMP_INVALID_FIELD; +get_only: + ret = subn_get_portinfo(smp, ibdev, port); +done: + return ret; +} + +/** + * rm_pkey - decrecment the reference count for the given PKEY + * @dd: the qlogic_ib device + * @key: the PKEY index + * + * Return true if this was the last reference and the hardware table entry + * needs to be changed. + */ +static int rm_pkey(struct qib_pportdata *ppd, u16 key) +{ + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (ppd->pkeys[i] != key) + continue; + if (atomic_dec_and_test(&ppd->pkeyrefs[i])) { + ppd->pkeys[i] = 0; + ret = 1; + goto bail; + } + break; + } + + ret = 0; + +bail: + return ret; +} + +/** + * add_pkey - add the given PKEY to the hardware table + * @dd: the qlogic_ib device + * @key: the PKEY + * + * Return an error code if unable to add the entry, zero if no change, + * or 1 if the hardware PKEY register needs to be updated. + */ +static int add_pkey(struct qib_pportdata *ppd, u16 key) +{ + int i; + u16 lkey = key & 0x7FFF; + int any = 0; + int ret; + + if (lkey == 0x7FFF) { + ret = 0; + goto bail; + } + + /* Look for an empty slot or a matching PKEY. */ + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i]) { + any++; + continue; + } + /* If it matches exactly, try to increment the ref count */ + if (ppd->pkeys[i] == key) { + if (atomic_inc_return(&ppd->pkeyrefs[i]) > 1) { + ret = 0; + goto bail; + } + /* Lost the race. Look for an empty slot below. */ + atomic_dec(&ppd->pkeyrefs[i]); + any++; + } + /* + * It makes no sense to have both the limited and unlimited + * PKEY set at the same time since the unlimited one will + * disable the limited one. + */ + if ((ppd->pkeys[i] & 0x7FFF) == lkey) { + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i] && + atomic_inc_return(&ppd->pkeyrefs[i]) == 1) { + /* for qibstats, etc. */ + ppd->pkeys[i] = key; + ret = 1; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * set_pkeys - set the PKEY table for ctxt 0 + * @dd: the qlogic_ib device + * @port: the IB port number + * @pkeys: the PKEY table + */ +static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) +{ + struct qib_pportdata *ppd; + struct qib_ctxtdata *rcd; + int i; + int changed = 0; + + /* + * IB port one/two always maps to context zero/one, + * always a kernel context, no locking needed + * If we get here with ppd setup, no need to check + * that rcd is valid. + */ + ppd = dd->pport + (port - 1); + rcd = dd->rcd[ppd->hw_pidx]; + + for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { + u16 key = pkeys[i]; + u16 okey = rcd->pkeys[i]; + + if (key == okey) + continue; + /* + * The value of this PKEY table entry is changing. + * Remove the old entry in the hardware's array of PKEYs. + */ + if (okey & 0x7FFF) + changed |= rm_pkey(ppd, okey); + if (key & 0x7FFF) { + int ret = add_pkey(ppd, key); + + if (ret < 0) + key = 0; + else + changed |= ret; + } + rcd->pkeys[i] = key; + } + if (changed) { + struct ib_event event; + + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev.ibdev; + event.element.port_num = 1; + ib_dispatch_event(&event); + } + return 0; +} + +static int subn_set_pkeytable(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); + __be16 *p = (__be16 *) smp->data; + u16 *q = (u16 *) smp->data; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + unsigned i, n = qib_get_npkeys(dd); + + for (i = 0; i < n; i++) + q[i] = be16_to_cpu(p[i]); + + if (startpx != 0 || set_pkeys(dd, port, q) != 0) + smp->status |= IB_SMP_INVALID_FIELD; + + return subn_get_pkeytable(smp, ibdev, port); +} + +static int subn_get_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + u8 *p = (u8 *) smp->data; + unsigned i; + + memset(smp->data, 0, sizeof(smp->data)); + + if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) + smp->status |= IB_SMP_UNSUP_METHOD; + else + for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2) + *p++ = (ibp->sl_to_vl[i] << 4) | ibp->sl_to_vl[i + 1]; + + return reply(smp); +} + +static int subn_set_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + u8 *p = (u8 *) smp->data; + unsigned i; + + if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) { + smp->status |= IB_SMP_UNSUP_METHOD; + return reply(smp); + } + + for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2, p++) { + ibp->sl_to_vl[i] = *p >> 4; + ibp->sl_to_vl[i + 1] = *p & 0xF; + } + qib_set_uevent_bits(ppd_from_ibp(to_iport(ibdev, port)), + _QIB_EVENT_SL2VL_CHANGE_BIT); + + return subn_get_sl_to_vl(smp, ibdev, port); +} + +static int subn_get_vl_arb(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + unsigned which = be32_to_cpu(smp->attr_mod) >> 16; + struct qib_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); + + memset(smp->data, 0, sizeof(smp->data)); + + if (ppd->vls_supported == IB_VL_VL0) + smp->status |= IB_SMP_UNSUP_METHOD; + else if (which == IB_VLARB_LOWPRI_0_31) + (void) ppd->dd->f_get_ib_table(ppd, QIB_IB_TBL_VL_LOW_ARB, + smp->data); + else if (which == IB_VLARB_HIGHPRI_0_31) + (void) ppd->dd->f_get_ib_table(ppd, QIB_IB_TBL_VL_HIGH_ARB, + smp->data); + else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static int subn_set_vl_arb(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + unsigned which = be32_to_cpu(smp->attr_mod) >> 16; + struct qib_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); + + if (ppd->vls_supported == IB_VL_VL0) + smp->status |= IB_SMP_UNSUP_METHOD; + else if (which == IB_VLARB_LOWPRI_0_31) + (void) ppd->dd->f_set_ib_table(ppd, QIB_IB_TBL_VL_LOW_ARB, + smp->data); + else if (which == IB_VLARB_HIGHPRI_0_31) + (void) ppd->dd->f_set_ib_table(ppd, QIB_IB_TBL_VL_HIGH_ARB, + smp->data); + else + smp->status |= IB_SMP_INVALID_FIELD; + + return subn_get_vl_arb(smp, ibdev, port); +} + +static int subn_trap_repress(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + /* + * For now, we only send the trap once so no need to process this. + * o13-6, o13-7, + * o14-3.a4 The SMA shall not send any message in response to a valid + * SubnTrapRepress() message. + */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; +} + +static int pma_get_classportinfo(struct ib_pma_mad *pmp, + struct ib_device *ibdev) +{ + struct ib_class_port_info *p = + (struct ib_class_port_info *)pmp->data; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + + memset(pmp->data, 0, sizeof(pmp->data)); + + if (pmp->mad_hdr.attr_mod != 0) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + /* Note that AllPortSelect is not valid */ + p->base_version = 1; + p->class_version = 1; + p->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + /* + * Set the most significant bit of CM2 to indicate support for + * congestion statistics + */ + p->reserved[0] = dd->psxmitwait_supported << 7; + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplescontrol *p = + (struct ib_pma_portsamplescontrol *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 port_select = p->port_select; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + goto bail; + } + spin_lock_irqsave(&ibp->lock, flags); + p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS); + p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + p->counter_width = 4; /* 32 bit counters */ + p->counter_mask0_9 = COUNTER_MASK0_9; + p->sample_start = cpu_to_be32(ibp->pma_sample_start); + p->sample_interval = cpu_to_be32(ibp->pma_sample_interval); + p->tag = cpu_to_be16(ibp->pma_tag); + p->counter_select[0] = ibp->pma_counter_select[0]; + p->counter_select[1] = ibp->pma_counter_select[1]; + p->counter_select[2] = ibp->pma_counter_select[2]; + p->counter_select[3] = ibp->pma_counter_select[3]; + p->counter_select[4] = ibp->pma_counter_select[4]; + spin_unlock_irqrestore(&ibp->lock, flags); + +bail: + return reply((struct ib_smp *) pmp); +} + +static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplescontrol *p = + (struct ib_pma_portsamplescontrol *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 status, xmit_flags; + int ret; + + if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + spin_lock_irqsave(&ibp->lock, flags); + + /* Port Sampling code owns the PS* HW counters */ + xmit_flags = ppd->cong_stats.flags; + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_SAMPLE; + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + if (status == IB_PMA_SAMPLE_STATUS_DONE || + (status == IB_PMA_SAMPLE_STATUS_RUNNING && + xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) { + ibp->pma_sample_start = be32_to_cpu(p->sample_start); + ibp->pma_sample_interval = be32_to_cpu(p->sample_interval); + ibp->pma_tag = be16_to_cpu(p->tag); + ibp->pma_counter_select[0] = p->counter_select[0]; + ibp->pma_counter_select[1] = p->counter_select[1]; + ibp->pma_counter_select[2] = p->counter_select[2]; + ibp->pma_counter_select[3] = p->counter_select[3]; + ibp->pma_counter_select[4] = p->counter_select[4]; + dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval, + ibp->pma_sample_start); + } + spin_unlock_irqrestore(&ibp->lock, flags); + + ret = pma_get_portsamplescontrol(pmp, ibdev, port); + +bail: + return ret; +} + +static u64 get_counter(struct qib_ibport *ibp, struct qib_pportdata *ppd, + __be16 sel) +{ + u64 ret; + + switch (sel) { + case IB_PMA_PORT_XMIT_DATA: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITDATA); + break; + case IB_PMA_PORT_RCV_DATA: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSRCVDATA); + break; + case IB_PMA_PORT_XMIT_PKTS: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITPKTS); + break; + case IB_PMA_PORT_RCV_PKTS: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSRCVPKTS); + break; + case IB_PMA_PORT_XMIT_WAIT: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITWAIT); + break; + default: + ret = 0; + } + + return ret; +} + +/* This function assumes that the xmit_wait lock is already held */ +static u64 xmit_wait_get_value_delta(struct qib_pportdata *ppd) +{ + u32 delta; + + delta = get_counter(&ppd->ibport_data, ppd, + IB_PMA_PORT_XMIT_WAIT); + return ppd->cong_stats.counter + delta; +} + +static void cache_hw_sample_counters(struct qib_pportdata *ppd) +{ + struct qib_ibport *ibp = &ppd->ibport_data; + + ppd->cong_stats.counter_cache.psxmitdata = + get_counter(ibp, ppd, IB_PMA_PORT_XMIT_DATA); + ppd->cong_stats.counter_cache.psrcvdata = + get_counter(ibp, ppd, IB_PMA_PORT_RCV_DATA); + ppd->cong_stats.counter_cache.psxmitpkts = + get_counter(ibp, ppd, IB_PMA_PORT_XMIT_PKTS); + ppd->cong_stats.counter_cache.psrcvpkts = + get_counter(ibp, ppd, IB_PMA_PORT_RCV_PKTS); + ppd->cong_stats.counter_cache.psxmitwait = + get_counter(ibp, ppd, IB_PMA_PORT_XMIT_WAIT); +} + +static u64 get_cache_hw_sample_counters(struct qib_pportdata *ppd, + __be16 sel) +{ + u64 ret; + + switch (sel) { + case IB_PMA_PORT_XMIT_DATA: + ret = ppd->cong_stats.counter_cache.psxmitdata; + break; + case IB_PMA_PORT_RCV_DATA: + ret = ppd->cong_stats.counter_cache.psrcvdata; + break; + case IB_PMA_PORT_XMIT_PKTS: + ret = ppd->cong_stats.counter_cache.psxmitpkts; + break; + case IB_PMA_PORT_RCV_PKTS: + ret = ppd->cong_stats.counter_cache.psrcvpkts; + break; + case IB_PMA_PORT_XMIT_WAIT: + ret = ppd->cong_stats.counter_cache.psxmitwait; + break; + default: + ret = 0; + } + + return ret; +} + +static int pma_get_portsamplesresult(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplesresult *p = + (struct ib_pma_portsamplesresult *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 status; + int i; + + memset(pmp->data, 0, sizeof(pmp->data)); + spin_lock_irqsave(&ibp->lock, flags); + p->tag = cpu_to_be16(ibp->pma_tag); + if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER) + p->sample_status = IB_PMA_SAMPLE_STATUS_DONE; + else { + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + p->sample_status = cpu_to_be16(status); + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + cache_hw_sample_counters(ppd); + ppd->cong_stats.counter = + xmit_wait_get_value_delta(ppd); + dd->f_set_cntr_sample(ppd, + QIB_CONG_TIMER_PSINTERVAL, 0); + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; + } + } + for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++) + p->counter[i] = cpu_to_be32( + get_cache_hw_sample_counters( + ppd, ibp->pma_counter_select[i])); + spin_unlock_irqrestore(&ibp->lock, flags); + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplesresult_ext *p = + (struct ib_pma_portsamplesresult_ext *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 status; + int i; + + /* Port Sampling code owns the PS* HW counters */ + memset(pmp->data, 0, sizeof(pmp->data)); + spin_lock_irqsave(&ibp->lock, flags); + p->tag = cpu_to_be16(ibp->pma_tag); + if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER) + p->sample_status = IB_PMA_SAMPLE_STATUS_DONE; + else { + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + p->sample_status = cpu_to_be16(status); + /* 64 bits */ + p->extended_width = cpu_to_be32(0x80000000); + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + cache_hw_sample_counters(ppd); + ppd->cong_stats.counter = + xmit_wait_get_value_delta(ppd); + dd->f_set_cntr_sample(ppd, + QIB_CONG_TIMER_PSINTERVAL, 0); + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; + } + } + for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++) + p->counter[i] = cpu_to_be64( + get_cache_hw_sample_counters( + ppd, ibp->pma_counter_select[i])); + spin_unlock_irqrestore(&ibp->lock, flags); + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portcounters(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_verbs_counters cntrs; + u8 port_select = p->port_select; + + qib_get_counters(ppd, &cntrs); + + /* Adjust counters for any resets done. */ + cntrs.symbol_error_counter -= ibp->z_symbol_error_counter; + cntrs.link_error_recovery_counter -= + ibp->z_link_error_recovery_counter; + cntrs.link_downed_counter -= ibp->z_link_downed_counter; + cntrs.port_rcv_errors -= ibp->z_port_rcv_errors; + cntrs.port_rcv_remphys_errors -= ibp->z_port_rcv_remphys_errors; + cntrs.port_xmit_discards -= ibp->z_port_xmit_discards; + cntrs.port_xmit_data -= ibp->z_port_xmit_data; + cntrs.port_rcv_data -= ibp->z_port_rcv_data; + cntrs.port_xmit_packets -= ibp->z_port_xmit_packets; + cntrs.port_rcv_packets -= ibp->z_port_rcv_packets; + cntrs.local_link_integrity_errors -= + ibp->z_local_link_integrity_errors; + cntrs.excessive_buffer_overrun_errors -= + ibp->z_excessive_buffer_overrun_errors; + cntrs.vl15_dropped -= ibp->z_vl15_dropped; + cntrs.vl15_dropped += ibp->n_vl15_dropped; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + if (cntrs.symbol_error_counter > 0xFFFFUL) + p->symbol_error_counter = cpu_to_be16(0xFFFF); + else + p->symbol_error_counter = + cpu_to_be16((u16)cntrs.symbol_error_counter); + if (cntrs.link_error_recovery_counter > 0xFFUL) + p->link_error_recovery_counter = 0xFF; + else + p->link_error_recovery_counter = + (u8)cntrs.link_error_recovery_counter; + if (cntrs.link_downed_counter > 0xFFUL) + p->link_downed_counter = 0xFF; + else + p->link_downed_counter = (u8)cntrs.link_downed_counter; + if (cntrs.port_rcv_errors > 0xFFFFUL) + p->port_rcv_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_errors = + cpu_to_be16((u16) cntrs.port_rcv_errors); + if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_remphys_errors = + cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); + if (cntrs.port_xmit_discards > 0xFFFFUL) + p->port_xmit_discards = cpu_to_be16(0xFFFF); + else + p->port_xmit_discards = + cpu_to_be16((u16)cntrs.port_xmit_discards); + if (cntrs.local_link_integrity_errors > 0xFUL) + cntrs.local_link_integrity_errors = 0xFUL; + if (cntrs.excessive_buffer_overrun_errors > 0xFUL) + cntrs.excessive_buffer_overrun_errors = 0xFUL; + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | + cntrs.excessive_buffer_overrun_errors; + if (cntrs.vl15_dropped > 0xFFFFUL) + p->vl15_dropped = cpu_to_be16(0xFFFF); + else + p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); + if (cntrs.port_xmit_data > 0xFFFFFFFFUL) + p->port_xmit_data = cpu_to_be32(0xFFFFFFFF); + else + p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); + if (cntrs.port_rcv_data > 0xFFFFFFFFUL) + p->port_rcv_data = cpu_to_be32(0xFFFFFFFF); + else + p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); + if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) + p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF); + else + p->port_xmit_packets = + cpu_to_be32((u32)cntrs.port_xmit_packets); + if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) + p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF); + else + p->port_rcv_packets = + cpu_to_be32((u32) cntrs.port_rcv_packets); + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + /* Congestion PMA packets start at offset 24 not 64 */ + struct ib_pma_portcounters_cong *p = + (struct ib_pma_portcounters_cong *)pmp->reserved; + struct qib_verbs_counters cntrs; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = dd_from_ppd(ppd); + u32 port_select = be32_to_cpu(pmp->mad_hdr.attr_mod) & 0xFF; + u64 xmit_wait_counter; + unsigned long flags; + + /* + * This check is performed only in the GET method because the + * SET method ends up calling this anyway. + */ + if (!dd->psxmitwait_supported) + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + if (port_select != port) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + qib_get_counters(ppd, &cntrs); + spin_lock_irqsave(&ppd->ibport_data.lock, flags); + xmit_wait_counter = xmit_wait_get_value_delta(ppd); + spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + + /* Adjust counters for any resets done. */ + cntrs.symbol_error_counter -= ibp->z_symbol_error_counter; + cntrs.link_error_recovery_counter -= + ibp->z_link_error_recovery_counter; + cntrs.link_downed_counter -= ibp->z_link_downed_counter; + cntrs.port_rcv_errors -= ibp->z_port_rcv_errors; + cntrs.port_rcv_remphys_errors -= + ibp->z_port_rcv_remphys_errors; + cntrs.port_xmit_discards -= ibp->z_port_xmit_discards; + cntrs.local_link_integrity_errors -= + ibp->z_local_link_integrity_errors; + cntrs.excessive_buffer_overrun_errors -= + ibp->z_excessive_buffer_overrun_errors; + cntrs.vl15_dropped -= ibp->z_vl15_dropped; + cntrs.vl15_dropped += ibp->n_vl15_dropped; + cntrs.port_xmit_data -= ibp->z_port_xmit_data; + cntrs.port_rcv_data -= ibp->z_port_rcv_data; + cntrs.port_xmit_packets -= ibp->z_port_xmit_packets; + cntrs.port_rcv_packets -= ibp->z_port_rcv_packets; + + memset(pmp->reserved, 0, sizeof(pmp->reserved) + + sizeof(pmp->data)); + + /* + * Set top 3 bits to indicate interval in picoseconds in + * remaining bits. + */ + p->port_check_rate = + cpu_to_be16((QIB_XMIT_RATE_PICO << 13) | + (dd->psxmitwait_check_rate & + ~(QIB_XMIT_RATE_PICO << 13))); + p->port_adr_events = cpu_to_be64(0); + p->port_xmit_wait = cpu_to_be64(xmit_wait_counter); + p->port_xmit_data = cpu_to_be64(cntrs.port_xmit_data); + p->port_rcv_data = cpu_to_be64(cntrs.port_rcv_data); + p->port_xmit_packets = + cpu_to_be64(cntrs.port_xmit_packets); + p->port_rcv_packets = + cpu_to_be64(cntrs.port_rcv_packets); + if (cntrs.symbol_error_counter > 0xFFFFUL) + p->symbol_error_counter = cpu_to_be16(0xFFFF); + else + p->symbol_error_counter = + cpu_to_be16( + (u16)cntrs.symbol_error_counter); + if (cntrs.link_error_recovery_counter > 0xFFUL) + p->link_error_recovery_counter = 0xFF; + else + p->link_error_recovery_counter = + (u8)cntrs.link_error_recovery_counter; + if (cntrs.link_downed_counter > 0xFFUL) + p->link_downed_counter = 0xFF; + else + p->link_downed_counter = + (u8)cntrs.link_downed_counter; + if (cntrs.port_rcv_errors > 0xFFFFUL) + p->port_rcv_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_errors = + cpu_to_be16((u16) cntrs.port_rcv_errors); + if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_remphys_errors = + cpu_to_be16( + (u16)cntrs.port_rcv_remphys_errors); + if (cntrs.port_xmit_discards > 0xFFFFUL) + p->port_xmit_discards = cpu_to_be16(0xFFFF); + else + p->port_xmit_discards = + cpu_to_be16((u16)cntrs.port_xmit_discards); + if (cntrs.local_link_integrity_errors > 0xFUL) + cntrs.local_link_integrity_errors = 0xFUL; + if (cntrs.excessive_buffer_overrun_errors > 0xFUL) + cntrs.excessive_buffer_overrun_errors = 0xFUL; + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | + cntrs.excessive_buffer_overrun_errors; + if (cntrs.vl15_dropped > 0xFFFFUL) + p->vl15_dropped = cpu_to_be16(0xFFFF); + else + p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); + + return reply((struct ib_smp *)pmp); +} + +static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters_ext *p = + (struct ib_pma_portcounters_ext *)pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u64 swords, rwords, spkts, rpkts, xwait; + u8 port_select = p->port_select; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + goto bail; + } + + qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); + + /* Adjust counters for any resets done. */ + swords -= ibp->z_port_xmit_data; + rwords -= ibp->z_port_rcv_data; + spkts -= ibp->z_port_xmit_packets; + rpkts -= ibp->z_port_rcv_packets; + + p->port_xmit_data = cpu_to_be64(swords); + p->port_rcv_data = cpu_to_be64(rwords); + p->port_xmit_packets = cpu_to_be64(spkts); + p->port_rcv_packets = cpu_to_be64(rpkts); + p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv); + +bail: + return reply((struct ib_smp *) pmp); +} + +static int pma_set_portcounters(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_verbs_counters cntrs; + + /* + * Since the HW doesn't support clearing counters, we save the + * current count and subtract it from future responses. + */ + qib_get_counters(ppd, &cntrs); + + if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) + ibp->z_symbol_error_counter = cntrs.symbol_error_counter; + + if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY) + ibp->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + + if (p->counter_select & IB_PMA_SEL_LINK_DOWNED) + ibp->z_link_downed_counter = cntrs.link_downed_counter; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS) + ibp->z_port_rcv_errors = cntrs.port_rcv_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS) + ibp->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) + ibp->z_port_xmit_discards = cntrs.port_xmit_discards; + + if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS) + ibp->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + + if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS) + ibp->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { + ibp->n_vl15_dropped = 0; + ibp->z_vl15_dropped = cntrs.vl15_dropped; + } + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) + ibp->z_port_xmit_data = cntrs.port_xmit_data; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA) + ibp->z_port_rcv_data = cntrs.port_rcv_data; + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS) + ibp->z_port_xmit_packets = cntrs.port_xmit_packets; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS) + ibp->z_port_rcv_packets = cntrs.port_rcv_packets; + + return pma_get_portcounters(pmp, ibdev, port); +} + +static int pma_set_portcounters_cong(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = dd_from_ppd(ppd); + struct qib_verbs_counters cntrs; + u32 counter_select = (be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24) & 0xFF; + int ret = 0; + unsigned long flags; + + qib_get_counters(ppd, &cntrs); + /* Get counter values before we save them */ + ret = pma_get_portcounters_cong(pmp, ibdev, port); + + if (counter_select & IB_PMA_SEL_CONG_XMIT) { + spin_lock_irqsave(&ppd->ibport_data.lock, flags); + ppd->cong_stats.counter = 0; + dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, + 0x0); + spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + } + if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) { + ibp->z_port_xmit_data = cntrs.port_xmit_data; + ibp->z_port_rcv_data = cntrs.port_rcv_data; + ibp->z_port_xmit_packets = cntrs.port_xmit_packets; + ibp->z_port_rcv_packets = cntrs.port_rcv_packets; + } + if (counter_select & IB_PMA_SEL_CONG_ALL) { + ibp->z_symbol_error_counter = + cntrs.symbol_error_counter; + ibp->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + ibp->z_link_downed_counter = + cntrs.link_downed_counter; + ibp->z_port_rcv_errors = cntrs.port_rcv_errors; + ibp->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + ibp->z_port_xmit_discards = + cntrs.port_xmit_discards; + ibp->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + ibp->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + ibp->n_vl15_dropped = 0; + ibp->z_vl15_dropped = cntrs.vl15_dropped; + } + + return ret; +} + +static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u64 swords, rwords, spkts, rpkts, xwait; + + qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); + + if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) + ibp->z_port_xmit_data = swords; + + if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA) + ibp->z_port_rcv_data = rwords; + + if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS) + ibp->z_port_xmit_packets = spkts; + + if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) + ibp->z_port_rcv_packets = rpkts; + + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) + ibp->n_unicast_xmit = 0; + + if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) + ibp->n_unicast_rcv = 0; + + if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) + ibp->n_multicast_xmit = 0; + + if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) + ibp->n_multicast_rcv = 0; + + return pma_get_portcounters_ext(pmp, ibdev, port); +} + +static int process_subn(struct ib_device *ibdev, int mad_flags, + u8 port, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_smp *smp = (struct ib_smp *)out_mad; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int ret; + + *out_mad = *in_mad; + if (smp->class_version != 1) { + smp->status |= IB_SMP_UNSUP_VERSION; + ret = reply(smp); + goto bail; + } + + ret = check_mkey(ibp, smp, mad_flags); + if (ret) { + u32 port_num = be32_to_cpu(smp->attr_mod); + + /* + * If this is a get/set portinfo, we already check the + * M_Key if the MAD is for another port and the M_Key + * is OK on the receiving port. This check is needed + * to increment the error counters when the M_Key + * fails to match on *both* ports. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + (smp->method == IB_MGMT_METHOD_GET || + smp->method == IB_MGMT_METHOD_SET) && + port_num && port_num <= ibdev->phys_port_cnt && + port != port_num) + (void) check_mkey(to_iport(ibdev, port_num), smp, 0); + goto bail; + } + + switch (smp->method) { + case IB_MGMT_METHOD_GET: + switch (smp->attr_id) { + case IB_SMP_ATTR_NODE_DESC: + ret = subn_get_nodedescription(smp, ibdev); + goto bail; + case IB_SMP_ATTR_NODE_INFO: + ret = subn_get_nodeinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_GUID_INFO: + ret = subn_get_guidinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PORT_INFO: + ret = subn_get_portinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PKEY_TABLE: + ret = subn_get_pkeytable(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SL_TO_VL_TABLE: + ret = subn_get_sl_to_vl(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_VL_ARB_TABLE: + ret = subn_get_vl_arb(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SM_INFO: + if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) { + ret = IB_MAD_RESULT_SUCCESS | + IB_MAD_RESULT_CONSUMED; + goto bail; + } + if (ibp->port_cap_flags & IB_PORT_SM) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + /* FALLTHROUGH */ + default: + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (smp->attr_id) { + case IB_SMP_ATTR_GUID_INFO: + ret = subn_set_guidinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PORT_INFO: + ret = subn_set_portinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PKEY_TABLE: + ret = subn_set_pkeytable(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SL_TO_VL_TABLE: + ret = subn_set_sl_to_vl(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_VL_ARB_TABLE: + ret = subn_set_vl_arb(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SM_INFO: + if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) { + ret = IB_MAD_RESULT_SUCCESS | + IB_MAD_RESULT_CONSUMED; + goto bail; + } + if (ibp->port_cap_flags & IB_PORT_SM) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + /* FALLTHROUGH */ + default: + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + goto bail; + } + + case IB_MGMT_METHOD_TRAP_REPRESS: + if (smp->attr_id == IB_SMP_ATTR_NOTICE) + ret = subn_trap_repress(smp, ibdev, port); + else { + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + } + goto bail; + + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_REPORT: + case IB_MGMT_METHOD_REPORT_RESP: + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + case IB_MGMT_METHOD_SEND: + if (ib_get_smp_direction(smp) && + smp->attr_id == QIB_VENDOR_IPG) { + ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PORT, + smp->data[0]); + ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } else + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + default: + smp->status |= IB_SMP_UNSUP_METHOD; + ret = reply(smp); + } + +bail: + return ret; +} + +static int process_perf(struct ib_device *ibdev, u8 port, + struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; + int ret; + + *out_mad = *in_mad; + if (pmp->mad_hdr.class_version != 1) { + pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + switch (pmp->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + switch (pmp->mad_hdr.attr_id) { + case IB_PMA_CLASS_PORT_INFO: + ret = pma_get_classportinfo(pmp, ibdev); + goto bail; + case IB_PMA_PORT_SAMPLES_CONTROL: + ret = pma_get_portsamplescontrol(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_SAMPLES_RESULT: + ret = pma_get_portsamplesresult(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_SAMPLES_RESULT_EXT: + ret = pma_get_portsamplesresult_ext(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS: + ret = pma_get_portcounters(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_EXT: + ret = pma_get_portcounters_ext(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_CONG: + ret = pma_get_portcounters_cong(pmp, ibdev, port); + goto bail; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (pmp->mad_hdr.attr_id) { + case IB_PMA_PORT_SAMPLES_CONTROL: + ret = pma_set_portsamplescontrol(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS: + ret = pma_set_portcounters(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_EXT: + ret = pma_set_portcounters_ext(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_CONG: + ret = pma_set_portcounters_cong(pmp, ibdev, port); + goto bail; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) pmp); + } + +bail: + return ret; +} + +/** + * qib_process_mad - process an incoming MAD packet + * @ibdev: the infiniband device this packet came in on + * @mad_flags: MAD flags + * @port: the port number this packet came in on + * @in_wc: the work completion entry for this packet + * @in_grh: the global route header for this packet + * @in_mad: the incoming MAD + * @out_mad: any outgoing MAD reply + * + * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not + * interested in processing. + * + * Note that the verbs framework has already done the MAD sanity checks, + * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE + * MADs. + * + * This is called by the ib_mad module. + */ +int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + int ret; + + switch (in_mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); + goto bail; + + case IB_MGMT_CLASS_PERF_MGMT: + ret = process_perf(ibdev, port, in_mad, out_mad); + goto bail; + + default: + ret = IB_MAD_RESULT_SUCCESS; + } + +bail: + return ret; +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + ib_free_send_mad(mad_send_wc->send_buf); +} + +static void xmit_wait_timer_func(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_devdata *dd = dd_from_ppd(ppd); + unsigned long flags; + u8 status; + + spin_lock_irqsave(&ppd->ibport_data.lock, flags); + if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) { + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + /* save counter cache */ + cache_hw_sample_counters(ppd); + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; + } else + goto done; + } + ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd); + dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0); +done: + spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + mod_timer(&ppd->cong_stats.timer, jiffies + HZ); +} + +int qib_create_agents(struct qib_ibdev *dev) +{ + struct qib_devdata *dd = dd_from_dev(dev); + struct ib_mad_agent *agent; + struct qib_ibport *ibp; + int p; + int ret; + + for (p = 0; p < dd->num_pports; p++) { + ibp = &dd->pport[p].ibport_data; + agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + + /* Initialize xmit_wait structure */ + dd->pport[p].cong_stats.counter = 0; + init_timer(&dd->pport[p].cong_stats.timer); + dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func; + dd->pport[p].cong_stats.timer.data = + (unsigned long)(&dd->pport[p]); + dd->pport[p].cong_stats.timer.expires = 0; + add_timer(&dd->pport[p].cong_stats.timer); + + ibp->send_agent = agent; + } + + return 0; + +err: + for (p = 0; p < dd->num_pports; p++) { + ibp = &dd->pport[p].ibport_data; + if (ibp->send_agent) { + agent = ibp->send_agent; + ibp->send_agent = NULL; + ib_unregister_mad_agent(agent); + } + } + + return ret; +} + +void qib_free_agents(struct qib_ibdev *dev) +{ + struct qib_devdata *dd = dd_from_dev(dev); + struct ib_mad_agent *agent; + struct qib_ibport *ibp; + int p; + + for (p = 0; p < dd->num_pports; p++) { + ibp = &dd->pport[p].ibport_data; + if (ibp->send_agent) { + agent = ibp->send_agent; + ibp->send_agent = NULL; + ib_unregister_mad_agent(agent); + } + if (ibp->sm_ah) { + ib_destroy_ah(&ibp->sm_ah->ibah); + ibp->sm_ah = NULL; + } + if (dd->pport[p].cong_stats.timer.data) + del_timer_sync(&dd->pport[p].cong_stats.timer); + } +} diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h new file mode 100644 index 00000000..ecc416cd --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) +#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) +#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) + +struct ib_node_info { + u8 base_version; + u8 class_version; + u8 node_type; + u8 num_ports; + __be64 sys_guid; + __be64 node_guid; + __be64 port_guid; + __be16 partition_cap; + __be16 device_id; + __be32 revision; + u8 local_port_num; + u8 vendor_id[3]; +} __attribute__ ((packed)); + +struct ib_mad_notice_attr { + u8 generic_type; + u8 prod_type_msb; + __be16 prod_type_lsb; + __be16 trap_num; + __be16 issuer_lid; + __be16 toggle_count; + + union { + struct { + u8 details[54]; + } raw_data; + + struct { + __be16 reserved; + __be16 lid; /* where violation happened */ + u8 port_num; /* where violation happened */ + } __attribute__ ((packed)) ntc_129_131; + + struct { + __be16 reserved; + __be16 lid; /* LID where change occurred */ + u8 reserved2; + u8 local_changes; /* low bit - local changes */ + __be32 new_cap_mask; /* new capability mask */ + u8 reserved3; + u8 change_flags; /* low 3 bits only */ + } __attribute__ ((packed)) ntc_144; + + struct { + __be16 reserved; + __be16 lid; /* lid where sys guid changed */ + __be16 reserved2; + __be64 new_sys_guid; + } __attribute__ ((packed)) ntc_145; + + struct { + __be16 reserved; + __be16 lid; + __be16 dr_slid; + u8 method; + u8 reserved2; + __be16 attr_id; + __be32 attr_mod; + __be64 mkey; + u8 reserved3; + u8 dr_trunc_hop; + u8 dr_rtn_path[30]; + } __attribute__ ((packed)) ntc_256; + + struct { + __be16 reserved; + __be16 lid1; + __be16 lid2; + __be32 key; + __be32 sl_qp1; /* SL: high 4 bits */ + __be32 qp2; /* high 8 bits reserved */ + union ib_gid gid1; + union ib_gid gid2; + } __attribute__ ((packed)) ntc_257_258; + + } details; +}; + +/* + * Generic trap/notice types + */ +#define IB_NOTICE_TYPE_FATAL 0x80 +#define IB_NOTICE_TYPE_URGENT 0x81 +#define IB_NOTICE_TYPE_SECURITY 0x82 +#define IB_NOTICE_TYPE_SM 0x83 +#define IB_NOTICE_TYPE_INFO 0x84 + +/* + * Generic trap/notice producers + */ +#define IB_NOTICE_PROD_CA cpu_to_be16(1) +#define IB_NOTICE_PROD_SWITCH cpu_to_be16(2) +#define IB_NOTICE_PROD_ROUTER cpu_to_be16(3) +#define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4) + +/* + * Generic trap/notice numbers + */ +#define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129) +#define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130) +#define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131) +#define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144) +#define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145) +#define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256) +#define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257) +#define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258) + +/* + * Repress trap/notice flags + */ +#define IB_NOTICE_REPRESS_LLI_THRESH (1 << 0) +#define IB_NOTICE_REPRESS_EBO_THRESH (1 << 1) +#define IB_NOTICE_REPRESS_FLOW_UPDATE (1 << 2) +#define IB_NOTICE_REPRESS_CAP_MASK_CHG (1 << 3) +#define IB_NOTICE_REPRESS_SYS_GUID_CHG (1 << 4) +#define IB_NOTICE_REPRESS_BAD_MKEY (1 << 5) +#define IB_NOTICE_REPRESS_BAD_PKEY (1 << 6) +#define IB_NOTICE_REPRESS_BAD_QKEY (1 << 7) + +/* + * Generic trap/notice other local changes flags (trap 144). + */ +#define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */ +#define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */ +#define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01 + +/* + * Generic trap/notice M_Key volation flags in dr_trunc_hop (trap 256). + */ +#define IB_NOTICE_TRAP_DR_NOTICE 0x80 +#define IB_NOTICE_TRAP_DR_TRUNC 0x40 + +struct ib_vl_weight_elem { + u8 vl; /* Only low 4 bits, upper 4 bits reserved */ + u8 weight; +}; + +#define IB_VLARB_LOWPRI_0_31 1 +#define IB_VLARB_LOWPRI_32_63 2 +#define IB_VLARB_HIGHPRI_0_31 3 +#define IB_VLARB_HIGHPRI_32_63 4 + +#define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00) + +struct ib_pma_portcounters_cong { + u8 reserved; + u8 reserved1; + __be16 port_check_rate; + __be16 symbol_error_counter; + u8 link_error_recovery_counter; + u8 link_downed_counter; + __be16 port_rcv_errors; + __be16 port_rcv_remphys_errors; + __be16 port_rcv_switch_relay_errors; + __be16 port_xmit_discards; + u8 port_xmit_constraint_errors; + u8 port_rcv_constraint_errors; + u8 reserved2; + u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */ + __be16 reserved3; + __be16 vl15_dropped; + __be64 port_xmit_data; + __be64 port_rcv_data; + __be64 port_xmit_packets; + __be64 port_rcv_packets; + __be64 port_xmit_wait; + __be64 port_adr_events; +} __attribute__ ((packed)); + +#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 +#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 + +#define QIB_XMIT_RATE_UNSUPPORTED 0x0 +#define QIB_XMIT_RATE_PICO 0x7 +/* number of 4nsec cycles equaling 2secs */ +#define QIB_CONG_TIMER_PSINTERVAL 0x1DCD64EC + +#define IB_PMA_SEL_CONG_ALL 0x01 +#define IB_PMA_SEL_CONG_PORT_DATA 0x02 +#define IB_PMA_SEL_CONG_XMIT 0x04 +#define IB_PMA_SEL_CONG_ROUTING 0x08 + +/* + * The PortSamplesControl.CounterMasks field is an array of 3 bit fields + * which specify the N'th counter's capabilities. See ch. 16.1.3.2. + * We support 5 counters which only count the mandatory quantities. + */ +#define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) +#define COUNTER_MASK0_9 \ + cpu_to_be32(COUNTER_MASK(1, 0) | \ + COUNTER_MASK(1, 1) | \ + COUNTER_MASK(1, 2) | \ + COUNTER_MASK(1, 3) | \ + COUNTER_MASK(1, 4)) diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c new file mode 100644 index 00000000..8b73a11d --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mmap.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "qib_verbs.h" + +/** + * qib_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct qib_mmap_info + */ +void qib_release_mmap_info(struct kref *ref) +{ + struct qib_mmap_info *ip = + container_of(ref, struct qib_mmap_info, ref); + struct qib_ibdev *dev = to_idev(ip->context->device); + + spin_lock_irq(&dev->pending_lock); + list_del(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + + vfree(ip->obj); + kfree(ip); +} + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void qib_vma_open(struct vm_area_struct *vma) +{ + struct qib_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void qib_vma_close(struct vm_area_struct *vma) +{ + struct qib_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, qib_release_mmap_info); +} + +static struct vm_operations_struct qib_vm_ops = { + .open = qib_vma_open, + .close = qib_vma_close, +}; + +/** + * qib_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct qib_ibdev *dev = to_idev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct qib_mmap_info *ip, *pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&dev->pending_lock); + list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, + pending_mmaps) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (__u64) offset != ip->offset) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + list_del_init(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &qib_vm_ops; + vma->vm_private_data = ip; + qib_vma_open(vma); + goto done; + } + spin_unlock_irq(&dev->pending_lock); +done: + return ret; +} + +/* + * Allocate information for qib_mmap + */ +struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj) { + struct qib_mmap_info *ip; + + ip = kmalloc(sizeof *ip, GFP_KERNEL); + if (!ip) + goto bail; + + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + +bail: + return ip; +} + +void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip, + u32 size, void *obj) +{ + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + ip->size = size; + ip->obj = obj; +} diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c new file mode 100644 index 00000000..08944e2e --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "qib.h" + +/* Fast memory region */ +struct qib_fmr { + struct ib_fmr ibfmr; + struct qib_mregion mr; /* must be last */ +}; + +static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct qib_fmr, ibfmr); +} + +/** + * qib_get_dma_mr - get a DMA memory region + * @pd: protection domain for this memory region + * @acc: access flags + * + * Returns the memory region on success, otherwise returns an errno. + * Note that all DMA addresses should be created via the + * struct ib_dma_mapping_ops functions (see qib_dma.c). + */ +struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct qib_ibdev *dev = to_idev(pd->device); + struct qib_mr *mr; + struct ib_mr *ret; + unsigned long flags; + + if (to_ipd(pd)->user) { + ret = ERR_PTR(-EPERM); + goto bail; + } + + mr = kzalloc(sizeof *mr, GFP_KERNEL); + if (!mr) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + mr->mr.access_flags = acc; + atomic_set(&mr->mr.refcount, 0); + + spin_lock_irqsave(&dev->lk_table.lock, flags); + if (!dev->dma_mr) + dev->dma_mr = &mr->mr; + spin_unlock_irqrestore(&dev->lk_table.lock, flags); + + ret = &mr->ibmr; + +bail: + return ret; +} + +static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) +{ + struct qib_mr *mr; + int m, i = 0; + + /* Allocate struct plus pointers to first level page tables. */ + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; + mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + if (!mr) + goto done; + + /* Allocate first level page tables. */ + for (; i < m; i++) { + mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); + if (!mr->mr.map[i]) + goto bail; + } + mr->mr.mapsz = m; + mr->mr.page_shift = 0; + mr->mr.max_segs = count; + + /* + * ib_reg_phys_mr() will initialize mr->ibmr except for + * lkey and rkey. + */ + if (!qib_alloc_lkey(lk_table, &mr->mr)) + goto bail; + mr->ibmr.lkey = mr->mr.lkey; + mr->ibmr.rkey = mr->mr.lkey; + + atomic_set(&mr->mr.refcount, 0); + goto done; + +bail: + while (i) + kfree(mr->mr.map[--i]); + kfree(mr); + mr = NULL; + +done: + return mr; +} + +/** + * qib_reg_phys_mr - register a physical memory region + * @pd: protection domain for this memory region + * @buffer_list: pointer to the list of physical buffers to register + * @num_phys_buf: the number of physical buffers to register + * @iova_start: the starting address passed over IB which maps to this MR + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start) +{ + struct qib_mr *mr; + int n, m, i; + struct ib_mr *ret; + + mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); + if (mr == NULL) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + mr->mr.pd = pd; + mr->mr.user_base = *iova_start; + mr->mr.iova = *iova_start; + mr->mr.length = 0; + mr->mr.offset = 0; + mr->mr.access_flags = acc; + mr->umem = NULL; + + m = 0; + n = 0; + for (i = 0; i < num_phys_buf; i++) { + mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr; + mr->mr.map[m]->segs[n].length = buffer_list[i].size; + mr->mr.length += buffer_list[i].size; + n++; + if (n == QIB_SEGSZ) { + m++; + n = 0; + } + } + + ret = &mr->ibmr; + +bail: + return ret; +} + +/** + * qib_reg_user_mr - register a userspace memory region + * @pd: protection domain for this memory region + * @start: starting userspace address + * @length: length of region to register + * @virt_addr: virtual address to use (from HCA's point of view) + * @mr_access_flags: access flags for this memory region + * @udata: unused by the QLogic_IB driver + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata) +{ + struct qib_mr *mr; + struct ib_umem *umem; + struct ib_umem_chunk *chunk; + int n, m, i; + struct ib_mr *ret; + + if (length == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(umem)) + return (void *) umem; + + n = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) + n += chunk->nents; + + mr = alloc_mr(n, &to_idev(pd->device)->lk_table); + if (!mr) { + ret = ERR_PTR(-ENOMEM); + ib_umem_release(umem); + goto bail; + } + + mr->mr.pd = pd; + mr->mr.user_base = start; + mr->mr.iova = virt_addr; + mr->mr.length = length; + mr->mr.offset = umem->offset; + mr->mr.access_flags = mr_access_flags; + mr->umem = umem; + + if (is_power_of_2(umem->page_size)) + mr->mr.page_shift = ilog2(umem->page_size); + m = 0; + n = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) { + for (i = 0; i < chunk->nents; i++) { + void *vaddr; + + vaddr = page_address(sg_page(&chunk->page_list[i])); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == QIB_SEGSZ) { + m++; + n = 0; + } + } + } + ret = &mr->ibmr; + +bail: + return ret; +} + +/** + * qib_dereg_mr - unregister and free a memory region + * @ibmr: the memory region to free + * + * Returns 0 on success. + * + * Note that this is called to free MRs created by qib_get_dma_mr() + * or qib_reg_user_mr(). + */ +int qib_dereg_mr(struct ib_mr *ibmr) +{ + struct qib_mr *mr = to_imr(ibmr); + struct qib_ibdev *dev = to_idev(ibmr->device); + int ret; + int i; + + ret = qib_free_lkey(dev, &mr->mr); + if (ret) + return ret; + + i = mr->mr.mapsz; + while (i) + kfree(mr->mr.map[--i]); + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); + return 0; +} + +/* + * Allocate a memory region usable with the + * IB_WR_FAST_REG_MR send work request. + * + * Return the memory region on success, otherwise return an errno. + */ +struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) +{ + struct qib_mr *mr; + + mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table); + if (mr == NULL) + return ERR_PTR(-ENOMEM); + + mr->mr.pd = pd; + mr->mr.user_base = 0; + mr->mr.iova = 0; + mr->mr.length = 0; + mr->mr.offset = 0; + mr->mr.access_flags = 0; + mr->umem = NULL; + + return &mr->ibmr; +} + +struct ib_fast_reg_page_list * +qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) +{ + unsigned size = page_list_len * sizeof(u64); + struct ib_fast_reg_page_list *pl; + + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + pl = kmalloc(sizeof *pl, GFP_KERNEL); + if (!pl) + return ERR_PTR(-ENOMEM); + + pl->page_list = kmalloc(size, GFP_KERNEL); + if (!pl->page_list) + goto err_free; + + return pl; + +err_free: + kfree(pl); + return ERR_PTR(-ENOMEM); +} + +void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) +{ + kfree(pl->page_list); + kfree(pl); +} + +/** + * qib_alloc_fmr - allocate a fast memory region + * @pd: the protection domain for this memory region + * @mr_access_flags: access flags for this memory region + * @fmr_attr: fast memory region attributes + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct qib_fmr *fmr; + int m, i = 0; + struct ib_fmr *ret; + + /* Allocate struct plus pointers to first level page tables. */ + m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; + fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + if (!fmr) + goto bail; + + /* Allocate first level page tables. */ + for (; i < m; i++) { + fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], + GFP_KERNEL); + if (!fmr->mr.map[i]) + goto bail; + } + fmr->mr.mapsz = m; + + /* + * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & + * rkey. + */ + if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) + goto bail; + fmr->ibfmr.rkey = fmr->mr.lkey; + fmr->ibfmr.lkey = fmr->mr.lkey; + /* + * Resources are allocated but no valid mapping (RKEY can't be + * used). + */ + fmr->mr.pd = pd; + fmr->mr.user_base = 0; + fmr->mr.iova = 0; + fmr->mr.length = 0; + fmr->mr.offset = 0; + fmr->mr.access_flags = mr_access_flags; + fmr->mr.max_segs = fmr_attr->max_pages; + fmr->mr.page_shift = fmr_attr->page_shift; + + atomic_set(&fmr->mr.refcount, 0); + ret = &fmr->ibfmr; + goto done; + +bail: + while (i) + kfree(fmr->mr.map[--i]); + kfree(fmr); + ret = ERR_PTR(-ENOMEM); + +done: + return ret; +} + +/** + * qib_map_phys_fmr - set up a fast memory region + * @ibmfr: the fast memory region to set up + * @page_list: the list of pages to associate with the fast memory region + * @list_len: the number of pages to associate with the fast memory region + * @iova: the virtual address of the start of the fast memory region + * + * This may be called from interrupt context. + */ + +int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + struct qib_fmr *fmr = to_ifmr(ibfmr); + struct qib_lkey_table *rkt; + unsigned long flags; + int m, n, i; + u32 ps; + int ret; + + if (atomic_read(&fmr->mr.refcount)) + return -EBUSY; + + if (list_len > fmr->mr.max_segs) { + ret = -EINVAL; + goto bail; + } + rkt = &to_idev(ibfmr->device)->lk_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = iova; + fmr->mr.iova = iova; + ps = 1 << fmr->mr.page_shift; + fmr->mr.length = list_len * ps; + m = 0; + n = 0; + for (i = 0; i < list_len; i++) { + fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; + fmr->mr.map[m]->segs[n].length = ps; + if (++n == QIB_SEGSZ) { + m++; + n = 0; + } + } + spin_unlock_irqrestore(&rkt->lock, flags); + ret = 0; + +bail: + return ret; +} + +/** + * qib_unmap_fmr - unmap fast memory regions + * @fmr_list: the list of fast memory regions to unmap + * + * Returns 0 on success. + */ +int qib_unmap_fmr(struct list_head *fmr_list) +{ + struct qib_fmr *fmr; + struct qib_lkey_table *rkt; + unsigned long flags; + + list_for_each_entry(fmr, fmr_list, ibfmr.list) { + rkt = &to_idev(fmr->ibfmr.device)->lk_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = 0; + fmr->mr.iova = 0; + fmr->mr.length = 0; + spin_unlock_irqrestore(&rkt->lock, flags); + } + return 0; +} + +/** + * qib_dealloc_fmr - deallocate a fast memory region + * @ibfmr: the fast memory region to deallocate + * + * Returns 0 on success. + */ +int qib_dealloc_fmr(struct ib_fmr *ibfmr) +{ + struct qib_fmr *fmr = to_ifmr(ibfmr); + int ret; + int i; + + ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr); + if (ret) + return ret; + + i = fmr->mr.mapsz; + while (i) + kfree(fmr->mr.map[--i]); + kfree(fmr); + return 0; +} diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c new file mode 100644 index 00000000..790646ef --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -0,0 +1,759 @@ +/* + * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "qib.h" + +/* + * This file contains PCIe utility routines that are common to the + * various QLogic InfiniPath adapters + */ + +/* + * Code to adjust PCIe capabilities. + * To minimize the change footprint, we call it + * from qib_pcie_params, which every chip-specific + * file calls, even though this violates some + * expectations of harmlessness. + */ +static int qib_tune_pcie_caps(struct qib_devdata *); +static int qib_tune_pcie_coalesce(struct qib_devdata *); + +/* + * Do all the common PCIe setup and initialization. + * devdata is not yet allocated, and is not allocated until after this + * routine returns success. Therefore qib_dev_err() can't be used for error + * printing. + */ +int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int ret; + + ret = pci_enable_device(pdev); + if (ret) { + /* + * This can happen (in theory) iff: + * We did a chip reset, and then failed to reprogram the + * BAR, or the chip reset due to an internal error. We then + * unloaded the driver and reloaded it. + * + * Both reset cases set the BAR back to initial state. For + * the latter case, the AER sticky error bit at offset 0x718 + * should be set, but the Linux kernel doesn't yet know + * about that, it appears. If the original BAR was retained + * in the kernel data structures, this may be OK. + */ + qib_early_err(&pdev->dev, "pci enable failed: error %d\n", + -ret); + goto done; + } + + ret = pci_request_regions(pdev, QIB_DRV_NAME); + if (ret) { + qib_devinfo(pdev, "pci_request_regions fails: err %d\n", -ret); + goto bail; + } + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + /* + * If the 64 bit setup fails, try 32 bit. Some systems + * do not setup 64 bit maps on systems with 2GB or less + * memory installed. + */ + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret) { + qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); + goto bail; + } + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + } else + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + qib_early_err(&pdev->dev, + "Unable to set DMA consistent mask: %d\n", ret); + goto bail; + } + + pci_set_master(pdev); + ret = pci_enable_pcie_error_reporting(pdev); + if (ret) { + qib_early_err(&pdev->dev, + "Unable to enable pcie error reporting: %d\n", + ret); + ret = 0; + } + goto done; + +bail: + pci_disable_device(pdev); + pci_release_regions(pdev); +done: + return ret; +} + +/* + * Do remaining PCIe setup, once dd is allocated, and save away + * fields required to re-initialize after a chip reset, or for + * various other purposes + */ +int qib_pcie_ddinit(struct qib_devdata *dd, struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + unsigned long len; + resource_size_t addr; + + dd->pcidev = pdev; + pci_set_drvdata(pdev, dd); + + addr = pci_resource_start(pdev, 0); + len = pci_resource_len(pdev, 0); + +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + dd->kregbase = __ioremap(addr, len, _PAGE_NO_CACHE | _PAGE_WRITETHRU); +#else + dd->kregbase = ioremap_nocache(addr, len); +#endif + + if (!dd->kregbase) + return -ENOMEM; + + dd->kregend = (u64 __iomem *)((void __iomem *) dd->kregbase + len); + dd->physaddr = addr; /* used for io_remap, etc. */ + + /* + * Save BARs to rewrite after device reset. Save all 64 bits of + * BAR, just in case. + */ + dd->pcibar0 = addr; + dd->pcibar1 = addr >> 32; + dd->deviceid = ent->device; /* save for later use */ + dd->vendorid = ent->vendor; + + return 0; +} + +/* + * Do PCIe cleanup, after chip-specific cleanup, etc. Just prior + * to releasing the dd memory. + * void because none of the core pcie cleanup returns are void + */ +void qib_pcie_ddcleanup(struct qib_devdata *dd) +{ + u64 __iomem *base = (void __iomem *) dd->kregbase; + + dd->kregbase = NULL; + iounmap(base); + if (dd->piobase) + iounmap(dd->piobase); + if (dd->userbase) + iounmap(dd->userbase); + if (dd->piovl15base) + iounmap(dd->piovl15base); + + pci_disable_device(dd->pcidev); + pci_release_regions(dd->pcidev); + + pci_set_drvdata(dd->pcidev, NULL); +} + +static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, + struct qib_msix_entry *qib_msix_entry) +{ + int ret; + u32 tabsize = 0; + u16 msix_flags; + struct msix_entry *msix_entry; + int i; + + /* We can't pass qib_msix_entry array to qib_msix_setup + * so use a dummy msix_entry array and copy the allocated + * irq back to the qib_msix_entry array. */ + msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL); + if (!msix_entry) { + ret = -ENOMEM; + goto do_intx; + } + for (i = 0; i < *msixcnt; i++) + msix_entry[i] = qib_msix_entry[i].msix; + + pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); + tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); + if (tabsize > *msixcnt) + tabsize = *msixcnt; + ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); + if (ret > 0) { + tabsize = ret; + ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); + } +do_intx: + if (ret) { + qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " + "falling back to INTx\n", tabsize, ret); + tabsize = 0; + } + for (i = 0; i < tabsize; i++) + qib_msix_entry[i].msix = msix_entry[i]; + kfree(msix_entry); + *msixcnt = tabsize; + + if (ret) + qib_enable_intx(dd->pcidev); + +} + +/** + * We save the msi lo and hi values, so we can restore them after + * chip reset (the kernel PCI infrastructure doesn't yet handle that + * correctly. + */ +static int qib_msi_setup(struct qib_devdata *dd, int pos) +{ + struct pci_dev *pdev = dd->pcidev; + u16 control; + int ret; + + ret = pci_enable_msi(pdev); + if (ret) + qib_dev_err(dd, "pci_enable_msi failed: %d, " + "interrupts may not work\n", ret); + /* continue even if it fails, we may still be OK... */ + + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, + &dd->msi_lo); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, + &dd->msi_hi); + pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control); + /* now save the data (vector) info */ + pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT) + ? 12 : 8), + &dd->msi_data); + return ret; +} + +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, + struct qib_msix_entry *entry) +{ + u16 linkstat, speed; + int pos = 0, pose, ret = 1; + + pose = pci_pcie_cap(dd->pcidev); + if (!pose) { + qib_dev_err(dd, "Can't find PCI Express capability!\n"); + /* set up something... */ + dd->lbus_width = 1; + dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ + goto bail; + } + + pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX); + if (nent && *nent && pos) { + qib_msix_setup(dd, pos, nent, entry); + ret = 0; /* did it, either MSIx or INTx */ + } else { + pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + if (pos) + ret = qib_msi_setup(dd, pos); + else + qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + } + if (!pos) + qib_enable_intx(dd->pcidev); + + pci_read_config_word(dd->pcidev, pose + PCI_EXP_LNKSTA, &linkstat); + /* + * speed is bits 0-3, linkwidth is bits 4-8 + * no defines for them in headers + */ + speed = linkstat & 0xf; + linkstat >>= 4; + linkstat &= 0x1f; + dd->lbus_width = linkstat; + + switch (speed) { + case 1: + dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ + break; + case 2: + dd->lbus_speed = 5000; /* Gen1, 5GHz */ + break; + default: /* not defined, assume gen1 */ + dd->lbus_speed = 2500; + break; + } + + /* + * Check against expected pcie width and complain if "wrong" + * on first initialization, not afterwards (i.e., reset). + */ + if (minw && linkstat < minw) + qib_dev_err(dd, + "PCIe width %u (x%u HCA), performance reduced\n", + linkstat, minw); + + qib_tune_pcie_caps(dd); + + qib_tune_pcie_coalesce(dd); + +bail: + /* fill in string, even on errors */ + snprintf(dd->lbus_info, sizeof(dd->lbus_info), + "PCIe,%uMHz,x%u\n", dd->lbus_speed, dd->lbus_width); + return ret; +} + +/* + * Setup pcie interrupt stuff again after a reset. I'd like to just call + * pci_enable_msi() again for msi, but when I do that, + * the MSI enable bit doesn't get set in the command word, and + * we switch to to a different interrupt vector, which is confusing, + * so I instead just do it all inline. Perhaps somehow can tie this + * into the PCIe hotplug support at some point + */ +int qib_reinit_intr(struct qib_devdata *dd) +{ + int pos; + u16 control; + int ret = 0; + + /* If we aren't using MSI, don't restore it */ + if (!dd->msi_lo) + goto bail; + + pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + if (!pos) { + qib_dev_err(dd, "Can't find MSI capability, " + "can't restore MSI settings\n"); + ret = 0; + /* nothing special for MSIx, just MSI */ + goto bail; + } + pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, + dd->msi_lo); + pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, + dd->msi_hi); + pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control); + if (!(control & PCI_MSI_FLAGS_ENABLE)) { + control |= PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, + control); + } + /* now rewrite the data (vector) info */ + pci_write_config_word(dd->pcidev, pos + + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), + dd->msi_data); + ret = 1; +bail: + if (!ret && (dd->flags & QIB_HAS_INTX)) { + qib_enable_intx(dd->pcidev); + ret = 1; + } + + /* and now set the pci master bit again */ + pci_set_master(dd->pcidev); + + return ret; +} + +/* + * Disable msi interrupt if enabled, and clear msi_lo. + * This is used primarily for the fallback to INTx, but + * is also used in reinit after reset, and during cleanup. + */ +void qib_nomsi(struct qib_devdata *dd) +{ + dd->msi_lo = 0; + pci_disable_msi(dd->pcidev); +} + +/* + * Same as qib_nosmi, but for MSIx. + */ +void qib_nomsix(struct qib_devdata *dd) +{ + pci_disable_msix(dd->pcidev); +} + +/* + * Similar to pci_intx(pdev, 1), except that we make sure + * msi(x) is off. + */ +void qib_enable_intx(struct pci_dev *pdev) +{ + u16 cw, new; + int pos; + + /* first, turn on INTx */ + pci_read_config_word(pdev, PCI_COMMAND, &cw); + new = cw & ~PCI_COMMAND_INTX_DISABLE; + if (new != cw) + pci_write_config_word(pdev, PCI_COMMAND, new); + + pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); + if (pos) { + /* then turn off MSI */ + pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); + new = cw & ~PCI_MSI_FLAGS_ENABLE; + if (new != cw) + pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); + } + pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); + if (pos) { + /* then turn off MSIx */ + pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); + new = cw & ~PCI_MSIX_FLAGS_ENABLE; + if (new != cw) + pci_write_config_word(pdev, pos + PCI_MSIX_FLAGS, new); + } +} + +/* + * These two routines are helper routines for the device reset code + * to move all the pcie code out of the chip-specific driver code. + */ +void qib_pcie_getcmd(struct qib_devdata *dd, u16 *cmd, u8 *iline, u8 *cline) +{ + pci_read_config_word(dd->pcidev, PCI_COMMAND, cmd); + pci_read_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline); + pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); +} + +void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) +{ + int r; + r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + dd->pcibar0); + if (r) + qib_dev_err(dd, "rewrite of BAR0 failed: %d\n", r); + r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + dd->pcibar1); + if (r) + qib_dev_err(dd, "rewrite of BAR1 failed: %d\n", r); + /* now re-enable memory access, and restore cosmetic settings */ + pci_write_config_word(dd->pcidev, PCI_COMMAND, cmd); + pci_write_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline); + pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); + r = pci_enable_device(dd->pcidev); + if (r) + qib_dev_err(dd, "pci_enable_device failed after " + "reset: %d\n", r); +} + +/* code to adjust PCIe capabilities. */ + +static int fld2val(int wd, int mask) +{ + int lsbmask; + + if (!mask) + return 0; + wd &= mask; + lsbmask = mask ^ (mask & (mask - 1)); + wd /= lsbmask; + return wd; +} + +static int val2fld(int wd, int mask) +{ + int lsbmask; + + if (!mask) + return 0; + lsbmask = mask ^ (mask & (mask - 1)); + wd *= lsbmask; + return wd; +} + +static int qib_pcie_coalesce; +module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); +MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); + +/* + * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 + * chipsets. This is known to be unsafe for some revisions of some + * of these chipsets, with some BIOS settings, and enabling it on those + * systems may result in the system crashing, and/or data corruption. + */ +static int qib_tune_pcie_coalesce(struct qib_devdata *dd) +{ + int r; + struct pci_dev *parent; + int ppos; + u16 devid; + u32 mask, bits, val; + + if (!qib_pcie_coalesce) + return 0; + + /* Find out supported and configured values for parent (root) */ + parent = dd->pcidev->bus->self; + if (parent->bus->parent) { + qib_devinfo(dd->pcidev, "Parent not root\n"); + return 1; + } + ppos = pci_pcie_cap(parent); + if (!ppos) + return 1; + if (parent->vendor != 0x8086) + return 1; + + /* + * - bit 12: Max_rdcmp_Imt_EN: need to set to 1 + * - bit 11: COALESCE_FORCE: need to set to 0 + * - bit 10: COALESCE_EN: need to set to 1 + * (but limitations on some on some chipsets) + * + * On the Intel 5000, 5100, and 7300 chipsets, there is + * also: - bit 25:24: COALESCE_MODE, need to set to 0 + */ + devid = parent->device; + if (devid >= 0x25e2 && devid <= 0x25fa) { + /* 5000 P/V/X/Z */ + if (parent->revision <= 0xb2) + bits = 1U << 10; + else + bits = 7U << 10; + mask = (3U << 24) | (7U << 10); + } else if (devid >= 0x65e2 && devid <= 0x65fa) { + /* 5100 */ + bits = 1U << 10; + mask = (3U << 24) | (7U << 10); + } else if (devid >= 0x4021 && devid <= 0x402e) { + /* 5400 */ + bits = 7U << 10; + mask = 7U << 10; + } else if (devid >= 0x3604 && devid <= 0x360a) { + /* 7300 */ + bits = 7U << 10; + mask = (3U << 24) | (7U << 10); + } else { + /* not one of the chipsets that we know about */ + return 1; + } + pci_read_config_dword(parent, 0x48, &val); + val &= ~mask; + val |= bits; + r = pci_write_config_dword(parent, 0x48, val); + return 0; +} + +/* + * BIOS may not set PCIe bus-utilization parameters for best performance. + * Check and optionally adjust them to maximize our throughput. + */ +static int qib_pcie_caps; +module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); +MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); + +static int qib_tune_pcie_caps(struct qib_devdata *dd) +{ + int ret = 1; /* Assume the worst */ + struct pci_dev *parent; + int ppos, epos; + u16 pcaps, pctl, ecaps, ectl; + int rc_sup, ep_sup; + int rc_cur, ep_cur; + + /* Find out supported and configured values for parent (root) */ + parent = dd->pcidev->bus->self; + if (parent->bus->parent) { + qib_devinfo(dd->pcidev, "Parent not root\n"); + goto bail; + } + ppos = pci_pcie_cap(parent); + if (ppos) { + pci_read_config_word(parent, ppos + PCI_EXP_DEVCAP, &pcaps); + pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); + } else + goto bail; + /* Find out supported and configured values for endpoint (us) */ + epos = pci_pcie_cap(dd->pcidev); + if (epos) { + pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCAP, &ecaps); + pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, &ectl); + } else + goto bail; + ret = 0; + /* Find max payload supported by root, endpoint */ + rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD); + ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD); + if (rc_sup > ep_sup) + rc_sup = ep_sup; + + rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD); + ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD); + + /* If Supported greater than limit in module param, limit it */ + if (rc_sup > (qib_pcie_caps & 7)) + rc_sup = qib_pcie_caps & 7; + /* If less than (allowed, supported), bump root payload */ + if (rc_sup > rc_cur) { + rc_cur = rc_sup; + pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) | + val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD); + pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + } + /* If less than (allowed, supported), bump endpoint payload */ + if (rc_sup > ep_cur) { + ep_cur = rc_sup; + ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) | + val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD); + pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + } + + /* + * Now the Read Request size. + * No field for max supported, but PCIe spec limits it to 4096, + * which is code '5' (log2(4096) - 7) + */ + rc_sup = 5; + if (rc_sup > ((qib_pcie_caps >> 4) & 7)) + rc_sup = (qib_pcie_caps >> 4) & 7; + rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ); + ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ); + + if (rc_sup > rc_cur) { + rc_cur = rc_sup; + pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) | + val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ); + pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + } + if (rc_sup > ep_cur) { + ep_cur = rc_sup; + ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) | + val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ); + pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + } +bail: + return ret; +} +/* End of PCIe capability tuning */ + +/* + * From here through qib_pci_err_handler definition is invoked via + * PCI error infrastructure, registered via pci + */ +static pci_ers_result_t +qib_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED; + + switch (state) { + case pci_channel_io_normal: + qib_devinfo(pdev, "State Normal, ignoring\n"); + break; + + case pci_channel_io_frozen: + qib_devinfo(pdev, "State Frozen, requesting reset\n"); + pci_disable_device(pdev); + ret = PCI_ERS_RESULT_NEED_RESET; + break; + + case pci_channel_io_perm_failure: + qib_devinfo(pdev, "State Permanent Failure, disabling\n"); + if (dd) { + /* no more register accesses! */ + dd->flags &= ~QIB_PRESENT; + qib_disable_after_error(dd); + } + /* else early, or other problem */ + ret = PCI_ERS_RESULT_DISCONNECT; + break; + + default: /* shouldn't happen */ + qib_devinfo(pdev, "QIB PCI errors detected (state %d)\n", + state); + break; + } + return ret; +} + +static pci_ers_result_t +qib_pci_mmio_enabled(struct pci_dev *pdev) +{ + u64 words = 0U; + struct qib_devdata *dd = pci_get_drvdata(pdev); + pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED; + + if (dd && dd->pport) { + words = dd->f_portcntr(dd->pport, QIBPORTCNTR_WORDRCV); + if (words == ~0ULL) + ret = PCI_ERS_RESULT_NEED_RESET; + } + qib_devinfo(pdev, "QIB mmio_enabled function called, " + "read wordscntr %Lx, returning %d\n", words, ret); + return ret; +} + +static pci_ers_result_t +qib_pci_slot_reset(struct pci_dev *pdev) +{ + qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static pci_ers_result_t +qib_pci_link_reset(struct pci_dev *pdev) +{ + qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static void +qib_pci_resume(struct pci_dev *pdev) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + qib_devinfo(pdev, "QIB resume function called\n"); + pci_cleanup_aer_uncorrect_error_status(pdev); + /* + * Running jobs will fail, since it's asynchronous + * unlike sysfs-requested reset. Better than + * doing nothing. + */ + qib_init(dd, 1); /* same as re-init after reset */ +} + +struct pci_error_handlers qib_pci_err_handler = { + .error_detected = qib_pci_error_detected, + .mmio_enabled = qib_pci_mmio_enabled, + .link_reset = qib_pci_link_reset, + .slot_reset = qib_pci_slot_reset, + .resume = qib_pci_resume, +}; diff --git a/drivers/infiniband/hw/qib/qib_pio_copy.c b/drivers/infiniband/hw/qib/qib_pio_copy.c new file mode 100644 index 00000000..10b8c444 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_pio_copy.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "qib.h" + +/** + * qib_pio_copy - copy data to MMIO space, in multiples of 32-bits + * @to: destination, in MMIO space (must be 64-bit aligned) + * @from: source (must be 64-bit aligned) + * @count: number of 32-bit quantities to copy + * + * Copy data from kernel space to MMIO space, in multiples of 32 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + */ +void qib_pio_copy(void __iomem *to, const void *from, size_t count) +{ +#ifdef CONFIG_64BIT + u64 __iomem *dst = to; + const u64 *src = from; + const u64 *end = src + (count >> 1); + + while (src < end) + __raw_writeq(*src++, dst++); + if (count & 1) + __raw_writel(*(const u32 *)src, dst); +#else + u32 __iomem *dst = to; + const u32 *src = from; + const u32 *end = src + count; + + while (src < end) + __raw_writel(*src++, dst++); +#endif +} diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c new file mode 100644 index 00000000..7e7e16fb --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -0,0 +1,1281 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "qib.h" + +#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) +#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) + +static inline unsigned mk_qpn(struct qib_qpn_table *qpt, + struct qpn_map *map, unsigned off) +{ + return (map - qpt->map) * BITS_PER_PAGE + off; +} + +static inline unsigned find_next_offset(struct qib_qpn_table *qpt, + struct qpn_map *map, unsigned off, + unsigned n) +{ + if (qpt->mask) { + off++; + if (((off & qpt->mask) >> 1) >= n) + off = (off | qpt->mask) + 2; + } else + off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); + return off; +} + +/* + * Convert the AETH credit code into the number of credits. + */ +static u32 credit_table[31] = { + 0, /* 0 */ + 1, /* 1 */ + 2, /* 2 */ + 3, /* 3 */ + 4, /* 4 */ + 6, /* 5 */ + 8, /* 6 */ + 12, /* 7 */ + 16, /* 8 */ + 24, /* 9 */ + 32, /* A */ + 48, /* B */ + 64, /* C */ + 96, /* D */ + 128, /* E */ + 192, /* F */ + 256, /* 10 */ + 384, /* 11 */ + 512, /* 12 */ + 768, /* 13 */ + 1024, /* 14 */ + 1536, /* 15 */ + 2048, /* 16 */ + 3072, /* 17 */ + 4096, /* 18 */ + 6144, /* 19 */ + 8192, /* 1A */ + 12288, /* 1B */ + 16384, /* 1C */ + 24576, /* 1D */ + 32768 /* 1E */ +}; + +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock(&qpt->lock); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock(&qpt->lock); +} + +/* + * Allocate the next available QPN or + * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. + */ +static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, + enum ib_qp_type type, u8 port) +{ + u32 i, offset, max_scan, qpn; + struct qpn_map *map; + u32 ret; + + if (type == IB_QPT_SMI || type == IB_QPT_GSI) { + unsigned n; + + ret = type == IB_QPT_GSI; + n = 1 << (ret + 2 * (port - 1)); + spin_lock(&qpt->lock); + if (qpt->flags & n) + ret = -EINVAL; + else + qpt->flags |= n; + spin_unlock(&qpt->lock); + goto bail; + } + + qpn = qpt->last + 2; + if (qpn >= QPN_MAX) + qpn = 2; + if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues) + qpn = (qpn | qpt->mask) + 2; + offset = qpn & BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / BITS_PER_PAGE]; + max_scan = qpt->nmaps - !offset; + for (i = 0;;) { + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) + break; + } + do { + if (!test_and_set_bit(offset, map->page)) { + qpt->last = qpn; + ret = qpn; + goto bail; + } + offset = find_next_offset(qpt, map, offset, + dd->n_krcv_queues); + qpn = mk_qpn(qpt, map, offset); + /* + * This test differs from alloc_pidmap(). + * If find_next_offset() does find a zero + * bit, we don't need to check for QPN + * wrapping around past our starting QPN. + * We just need to be sure we don't loop + * forever. + */ + } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); + /* + * In order to keep the number of pages allocated to a + * minimum, we scan the all existing pages before increasing + * the size of the bitmap table. + */ + if (++i > max_scan) { + if (qpt->nmaps == QPNMAP_ENTRIES) + break; + map = &qpt->map[qpt->nmaps++]; + offset = 0; + } else if (map < &qpt->map[qpt->nmaps]) { + ++map; + offset = 0; + } else { + map = &qpt->map[0]; + offset = 2; + } + qpn = mk_qpn(qpt, map, offset); + } + + ret = -ENOMEM; + +bail: + return ret; +} + +static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) +{ + struct qpn_map *map; + + map = qpt->map + qpn / BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); +} + +static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) +{ + return jhash_1word(qpn, dev->qp_rnd) & + (dev->qp_table_size - 1); +} + + +/* + * Put the QP into the hash table. + * The hash table holds a reference to the QP. + */ +static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + unsigned long flags; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); + + spin_lock_irqsave(&dev->qpt_lock, flags); + atomic_inc(&qp->refcount); + + if (qp->ibqp.qp_num == 0) + rcu_assign_pointer(ibp->qp0, qp); + else if (qp->ibqp.qp_num == 1) + rcu_assign_pointer(ibp->qp1, qp); + else { + qp->next = dev->qp_table[n]; + rcu_assign_pointer(dev->qp_table[n], qp); + } + + spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); +} + +/* + * Remove the QP from the table so it can't be found asynchronously by + * the receive interrupt routine. + */ +static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); + unsigned long flags; + + spin_lock_irqsave(&dev->qpt_lock, flags); + + if (ibp->qp0 == qp) { + atomic_dec(&qp->refcount); + rcu_assign_pointer(ibp->qp0, NULL); + } else if (ibp->qp1 == qp) { + atomic_dec(&qp->refcount); + rcu_assign_pointer(ibp->qp1, NULL); + } else { + struct qib_qp *q, **qpp; + + qpp = &dev->qp_table[n]; + for (; (q = *qpp) != NULL; qpp = &q->next) + if (q == qp) { + atomic_dec(&qp->refcount); + rcu_assign_pointer(*qpp, qp->next); + qp->next = NULL; + break; + } + } + + spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); +} + +/** + * qib_free_all_qps - check for QPs still in use + * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. + */ +unsigned qib_free_all_qps(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + unsigned long flags; + struct qib_qp *qp; + unsigned n, qp_inuse = 0; + + for (n = 0; n < dd->num_pports; n++) { + struct qib_ibport *ibp = &dd->pport[n].ibport_data; + + if (!qib_mcast_tree_empty(ibp)) + qp_inuse++; + rcu_read_lock(); + if (rcu_dereference(ibp->qp0)) + qp_inuse++; + if (rcu_dereference(ibp->qp1)) + qp_inuse++; + rcu_read_unlock(); + } + + spin_lock_irqsave(&dev->qpt_lock, flags); + for (n = 0; n < dev->qp_table_size; n++) { + qp = dev->qp_table[n]; + rcu_assign_pointer(dev->qp_table[n], NULL); + + for (; qp; qp = qp->next) + qp_inuse++; + } + spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); + + return qp_inuse; +} + +/** + * qib_lookup_qpn - return the QP with the given QPN + * @qpt: the QP table + * @qpn: the QP number to look up + * + * The caller is responsible for decrementing the QP reference count + * when done. + */ +struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) +{ + struct qib_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + rcu_read_lock(); + if (qpn == 0) + qp = rcu_dereference(ibp->qp0); + else + qp = rcu_dereference(ibp->qp1); + } else { + struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; + unsigned n = qpn_hash(dev, qpn); + + rcu_read_lock(); + for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next) + if (qp->ibqp.qp_num == qpn) + break; + } + if (qp) + if (unlikely(!atomic_inc_not_zero(&qp->refcount))) + qp = NULL; + + rcu_read_unlock(); + return qp; +} + +/** + * qib_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + */ +static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) +{ + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; + atomic_set(&qp->s_dma_busy, 0); + qp->s_flags &= QIB_S_SIGNAL_REQ_WR; + qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_draining = 0; + qp->s_next_psn = 0; + qp->s_last_psn = 0; + qp->s_sending_psn = 0; + qp->s_sending_hpsn = 0; + qp->s_psn = 0; + qp->r_psn = 0; + qp->r_msn = 0; + if (type == IB_QPT_RC) { + qp->s_state = IB_OPCODE_RC_SEND_LAST; + qp->r_state = IB_OPCODE_RC_SEND_LAST; + } else { + qp->s_state = IB_OPCODE_UC_SEND_LAST; + qp->r_state = IB_OPCODE_UC_SEND_LAST; + } + qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; + qp->s_head = 0; + qp->s_tail = 0; + qp->s_cur = 0; + qp->s_acked = 0; + qp->s_last = 0; + qp->s_ssn = 1; + qp->s_lsn = 0; + qp->s_mig_state = IB_MIG_MIGRATED; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } + qp->r_sge.num_sge = 0; +} + +static void clear_mr_refs(struct qib_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + while (qp->s_rdma_read_sge.num_sge) { + atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); + if (--qp->s_rdma_read_sge.num_sge) + qp->s_rdma_read_sge.sge = + *qp->s_rdma_read_sge.sg_list++; + } + + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { + atomic_dec(&qp->s_rdma_mr->refcount); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct qib_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + } +} + +/** + * qib_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active + * + * Flushes both send and receive work queues. + * Returns true if last WQE event should be generated. + * The QP r_lock and s_lock should be held and interrupts disabled. + * If we are already in error state, just return. + */ +int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct ib_wc wc; + int ret = 0; + + if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) + goto bail; + + qp->state = IB_QPS_ERR; + + if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + if (qp->s_flags & QIB_S_ANY_WAIT_SEND) + qp->s_flags &= ~QIB_S_ANY_WAIT_SEND; + + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) { + qp->s_flags &= ~QIB_S_ANY_WAIT_IO; + list_del_init(&qp->iowait); + } + spin_unlock(&dev->pending_lock); + + if (!(qp->s_flags & QIB_S_BUSY)) { + qp->s_hdrwords = 0; + if (qp->s_rdma_mr) { + atomic_dec(&qp->s_rdma_mr->refcount); + qp->s_rdma_mr = NULL; + } + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + } + + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + qib_schedule_send(qp); + + clear_mr_refs(qp, 0); + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct qib_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) + ret = 1; + +bail: + return ret; +} + +/** + * qib_modify_qp - modify the attributes of a queue pair + * @ibqp: the queue pair who's attributes we're modifying + * @attr: the new attributes + * @attr_mask: the mask of attributes to modify + * @udata: user data for libibverbs.so + * + * Returns 0 on success, otherwise returns an errno. + */ +int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_qp *qp = to_iqp(ibqp); + enum ib_qp_state cur_state, new_state; + struct ib_event ev; + int lastwqe = 0; + int mig = 0; + int ret; + u32 pmtu = 0; /* for gcc warning only */ + + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask)) + goto inval; + + if (attr_mask & IB_QP_AV) { + if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + goto inval; + if (qib_check_ah(qp->ibqp.device, &attr->ah_attr)) + goto inval; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + goto inval; + if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + goto inval; + if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev))) + goto inval; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev))) + goto inval; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + if (attr->min_rnr_timer > 31) + goto inval; + + if (attr_mask & IB_QP_PORT) + if (qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI || + attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_DEST_QPN) + if (attr->dest_qp_num > QIB_QPN_MASK) + goto inval; + + if (attr_mask & IB_QP_RETRY_CNT) + if (attr->retry_cnt > 7) + goto inval; + + if (attr_mask & IB_QP_RNR_RETRY) + if (attr->rnr_retry > 7) + goto inval; + + /* + * Don't allow invalid path_mtu values. OK to set greater + * than the active mtu (or even the max_cap, if we have tuned + * that to a small mtu. We'll set qp->path_mtu + * to the lesser of requested attribute mtu and active, + * for packetizing messages. + * Note that the QP port has to be set in INIT and MTU in RTR. + */ + if (attr_mask & IB_QP_PATH_MTU) { + struct qib_devdata *dd = dd_from_dev(dev); + int mtu, pidx = qp->port_num - 1; + + mtu = ib_mtu_enum_to_int(attr->path_mtu); + if (mtu == -1) + goto inval; + if (mtu > dd->pport[pidx].ibmtu) { + switch (dd->pport[pidx].ibmtu) { + case 4096: + pmtu = IB_MTU_4096; + break; + case 2048: + pmtu = IB_MTU_2048; + break; + case 1024: + pmtu = IB_MTU_1024; + break; + case 512: + pmtu = IB_MTU_512; + break; + case 256: + pmtu = IB_MTU_256; + break; + default: + pmtu = IB_MTU_2048; + } + } else + pmtu = attr->path_mtu; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state == IB_MIG_REARM) { + if (qp->s_mig_state == IB_MIG_ARMED) + goto inval; + if (new_state != IB_QPS_RTS) + goto inval; + } else if (attr->path_mig_state == IB_MIG_MIGRATED) { + if (qp->s_mig_state == IB_MIG_REARM) + goto inval; + if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) + goto inval; + if (qp->s_mig_state == IB_MIG_ARMED) + mig = 1; + } else + goto inval; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC) + goto inval; + + switch (new_state) { + case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait)) + list_del_init(&qp->iowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + /* Stop the sending work queue and retry timer */ + cancel_work_sync(&qp->s_work); + del_timer_sync(&qp->s_timer); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + remove_qp(dev, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + clear_mr_refs(qp, 1); + qib_reset_qp(qp, ibqp->qp_type); + } + break; + + case IB_QPS_RTR: + /* Allow event to retrigger if QP set to RTR more than once */ + qp->r_flags &= ~QIB_R_COMM_EST; + qp->state = new_state; + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + + case IB_QPS_ERR: + lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + qp->state = new_state; + break; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + qp->s_pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_PORT) + qp->port_num = attr->port_num; + + if (attr_mask & IB_QP_DEST_QPN) + qp->remote_qpn = attr->dest_qp_num; + + if (attr_mask & IB_QP_SQ_PSN) { + qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK; + qp->s_psn = qp->s_next_psn; + qp->s_sending_psn = qp->s_next_psn; + qp->s_last_psn = qp->s_next_psn - 1; + qp->s_sending_hpsn = qp->s_last_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) + qp->r_psn = attr->rq_psn & QIB_PSN_MASK; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_AV) { + qp->remote_ah_attr = attr->ah_attr; + qp->s_srate = attr->ah_attr.static_rate; + } + + if (attr_mask & IB_QP_ALT_PATH) { + qp->alt_ah_attr = attr->alt_ah_attr; + qp->s_alt_pkey_index = attr->alt_pkey_index; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + qp->s_mig_state = attr->path_mig_state; + if (mig) { + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + qp->path_mtu = pmtu; + qp->pmtu = ib_mtu_enum_to_int(pmtu); + } + + if (attr_mask & IB_QP_RETRY_CNT) { + qp->s_retry_cnt = attr->retry_cnt; + qp->s_retry = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp->s_rnr_retry_cnt = attr->rnr_retry; + qp->s_rnr_retry = attr->rnr_retry; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) { + qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + insert_qp(dev, qp); + + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + if (mig) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + ret = 0; + goto bail; + +inval: + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + ret = -EINVAL; + +bail: + return ret; +} + +int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct qib_qp *qp = to_iqp(ibqp); + + attr->qp_state = qp->state; + attr->cur_qp_state = attr->qp_state; + attr->path_mtu = qp->path_mtu; + attr->path_mig_state = qp->s_mig_state; + attr->qkey = qp->qkey; + attr->rq_psn = qp->r_psn & QIB_PSN_MASK; + attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK; + attr->dest_qp_num = qp->remote_qpn; + attr->qp_access_flags = qp->qp_access_flags; + attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; + attr->cap.max_send_sge = qp->s_max_sge; + attr->cap.max_recv_sge = qp->r_rq.max_sge; + attr->cap.max_inline_data = 0; + attr->ah_attr = qp->remote_ah_attr; + attr->alt_ah_attr = qp->alt_ah_attr; + attr->pkey_index = qp->s_pkey_index; + attr->alt_pkey_index = qp->s_alt_pkey_index; + attr->en_sqd_async_notify = 0; + attr->sq_draining = qp->s_draining; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; + attr->min_rnr_timer = qp->r_min_rnr_timer; + attr->port_num = qp->port_num; + attr->timeout = qp->timeout; + attr->retry_cnt = qp->s_retry_cnt; + attr->rnr_retry = qp->s_rnr_retry_cnt; + attr->alt_port_num = qp->alt_ah_attr.port_num; + attr->alt_timeout = qp->alt_timeout; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->cap = attr->cap; + if (qp->s_flags & QIB_S_SIGNAL_REQ_WR) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->port_num = qp->port_num; + return 0; +} + +/** + * qib_compute_aeth - compute the AETH (syndrome + MSN) + * @qp: the queue pair to compute the AETH for + * + * Returns the AETH. + */ +__be32 qib_compute_aeth(struct qib_qp *qp) +{ + u32 aeth = qp->r_msn & QIB_MSN_MASK; + + if (qp->ibqp.srq) { + /* + * Shared receive queues don't generate credits. + * Set the credit field to the invalid value. + */ + aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT; + } else { + u32 min, max, x; + u32 credits; + struct qib_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + /* + * Compute the number of credits available (RWQEs). + * XXX Not holding the r_rq.lock here so there is a small + * chance that the pair of reads are not atomic. + */ + credits = head - tail; + if ((int)credits < 0) + credits += qp->r_rq.size; + /* + * Binary search the credit table to find the code to + * use. + */ + min = 0; + max = 31; + for (;;) { + x = (min + max) / 2; + if (credit_table[x] == credits) + break; + if (credit_table[x] > credits) + max = x; + else if (min == x) + break; + else + min = x; + } + aeth |= x << QIB_AETH_CREDIT_SHIFT; + } + return cpu_to_be32(aeth); +} + +/** + * qib_create_qp - create a queue pair for a device + * @ibpd: the protection domain who's device we create the queue pair for + * @init_attr: the attributes of the queue pair + * @udata: user data for libibverbs.so + * + * Returns the queue pair on success, otherwise returns an errno. + * + * Called by the ib_create_qp() core verbs function. + */ +struct ib_qp *qib_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qib_qp *qp; + int err; + struct qib_swqe *swq = NULL; + struct qib_ibdev *dev; + struct qib_devdata *dd; + size_t sz; + size_t sg_list_sz; + struct ib_qp *ret; + + if (init_attr->cap.max_send_sge > ib_qib_max_sges || + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > ib_qib_max_sges || + init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + } + + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > ibpd->device->phys_port_cnt) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + case IB_QPT_UC: + case IB_QPT_RC: + case IB_QPT_UD: + sz = sizeof(struct qib_sge) * + init_attr->cap.max_send_sge + + sizeof(struct qib_swqe); + swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (swq == NULL) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct qib_srq *srq = to_isrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + if (!qp) { + ret = ERR_PTR(-ENOMEM); + goto bail_swq; + } + RCU_INIT_POINTER(qp->next, NULL); + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + if (init_attr->srq) + sz = 0; + else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct qib_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } + + /* + * ib_create_qp() will initialize qp->ibqp + * except for qp->ibqp.qp_num. + */ + spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_lock); + spin_lock_init(&qp->r_rq.lock); + atomic_set(&qp->refcount, 0); + init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); + init_timer(&qp->s_timer); + qp->s_timer.data = (unsigned long)qp; + INIT_WORK(&qp->s_work, qib_do_send); + INIT_LIST_HEAD(&qp->iowait); + INIT_LIST_HEAD(&qp->rspwait); + qp->state = IB_QPS_RESET; + qp->s_wq = swq; + qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_max_sge = init_attr->cap.max_send_sge; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = QIB_S_SIGNAL_REQ_WR; + dev = to_idev(ibpd->device); + dd = dd_from_dev(dev); + err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, + init_attr->port_num); + if (err < 0) { + ret = ERR_PTR(err); + vfree(qp->r_rq.wq); + goto bail_qp; + } + qp->ibqp.qp_num = err; + qp->port_num = init_attr->port_num; + qib_reset_qp(qp, init_attr->qp_type); + break; + + default: + /* Don't support raw QPs */ + ret = ERR_PTR(-ENOSYS); + goto bail; + } + + init_attr->cap.max_inline_data = 0; + + /* + * Return the address of the RWQ as the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz; + + qp->ip = qib_create_mmap_info(dev, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + err = ib_copy_to_udata(udata, &(qp->ip->offset), + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_qib_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = &qp->ibqp; + goto bail; + +bail_ip: + if (qp->ip) + kref_put(&qp->ip->ref, qib_release_mmap_info); + else + vfree(qp->r_rq.wq); + free_qpn(&dev->qpn_table, qp->ibqp.qp_num); +bail_qp: + kfree(qp); +bail_swq: + vfree(swq); +bail: + return ret; +} + +/** + * qib_destroy_qp - destroy a queue pair + * @ibqp: the queue pair to destroy + * + * Returns 0 on success. + * + * Note that this can be called while the QP is actively sending or + * receiving! + */ +int qib_destroy_qp(struct ib_qp *ibqp) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_ibdev *dev = to_idev(ibqp->device); + + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait)) + list_del_init(&qp->iowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + spin_unlock_irq(&qp->s_lock); + cancel_work_sync(&qp->s_work); + del_timer_sync(&qp->s_timer); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + remove_qp(dev, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + clear_mr_refs(qp, 1); + } else + spin_unlock_irq(&qp->s_lock); + + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qpn_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) + kref_put(&qp->ip->ref, qib_release_mmap_info); + else + vfree(qp->r_rq.wq); + vfree(qp->s_wq); + kfree(qp); + return 0; +} + +/** + * qib_init_qpn_table - initialize the QP number table for a device + * @qpt: the QPN table + */ +void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt) +{ + spin_lock_init(&qpt->lock); + qpt->last = 1; /* start with QPN 2 */ + qpt->nmaps = 1; + qpt->mask = dd->qpn_mask; +} + +/** + * qib_free_qpn_table - free the QP number table for a device + * @qpt: the QPN table + */ +void qib_free_qpn_table(struct qib_qpn_table *qpt) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qpt->map); i++) + if (qpt->map[i].page) + free_page((unsigned long) qpt->map[i].page); +} + +/** + * qib_get_credit - flush the send work queue of a QP + * @qp: the qp who's send work queue to flush + * @aeth: the Acknowledge Extended Transport Header + * + * The QP s_lock should be held. + */ +void qib_get_credit(struct qib_qp *qp, u32 aeth) +{ + u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK; + + /* + * If the credit is invalid, we can send + * as many packets as we like. Otherwise, we have to + * honor the credit field. + */ + if (credit == QIB_AETH_CREDIT_INVAL) { + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { + qp->s_flags |= QIB_S_UNLIMITED_CREDIT; + if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + qib_schedule_send(qp); + } + } + } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { + /* Compute new LSN (i.e., MSN + credit) */ + credit = (aeth + credit_table[credit]) & QIB_MSN_MASK; + if (qib_cmp24(credit, qp->s_lsn) > 0) { + qp->s_lsn = credit; + if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + qib_schedule_send(qp); + } + } + } +} diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c new file mode 100644 index 00000000..fa71b1e6 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "qib.h" +#include "qib_qsfp.h" + +/* + * QSFP support for ib_qib driver, using "Two Wire Serial Interface" driver + * in qib_twsi.c + */ +#define QSFP_MAX_RETRY 4 + +static int qsfp_read(struct qib_pportdata *ppd, int addr, void *bp, int len) +{ + struct qib_devdata *dd = ppd->dd; + u32 out, mask; + int ret, cnt, pass = 0; + int stuck = 0; + u8 *buff = bp; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) + goto no_unlock; + + if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) { + ret = -ENXIO; + goto bail; + } + + /* + * We presume, if we are called at all, that this board has + * QSFP. This is on the same i2c chain as the legacy parts, + * but only responds if the module is selected via GPIO pins. + * Further, there are very long setup and hold requirements + * on MODSEL. + */ + mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + if (ppd->hw_pidx) { + mask <<= QSFP_GPIO_PORT2_SHIFT; + out <<= QSFP_GPIO_PORT2_SHIFT; + } + + dd->f_gpio_mod(dd, out, mask, mask); + + /* + * Module could take up to 2 Msec to respond to MOD_SEL, and there + * is no way to tell if it is ready, so we must wait. + */ + msleep(2); + + /* Make sure TWSI bus is in sane state. */ + ret = qib_twsi_reset(dd); + if (ret) { + qib_dev_porterr(dd, ppd->port, + "QSFP interface Reset for read failed\n"); + ret = -EIO; + stuck = 1; + goto deselect; + } + + /* All QSFP modules are at A0 */ + + cnt = 0; + while (cnt < len) { + unsigned in_page; + int wlen = len - cnt; + in_page = addr % QSFP_PAGESIZE; + if ((in_page + wlen) > QSFP_PAGESIZE) + wlen = QSFP_PAGESIZE - in_page; + ret = qib_twsi_blk_rd(dd, QSFP_DEV, addr, buff + cnt, wlen); + /* Some QSFP's fail first try. Retry as experiment */ + if (ret && cnt == 0 && ++pass < QSFP_MAX_RETRY) + continue; + if (ret) { + /* qib_twsi_blk_rd() 1 for error, else 0 */ + ret = -EIO; + goto deselect; + } + addr += wlen; + cnt += wlen; + } + ret = cnt; + +deselect: + /* + * Module could take up to 10 uSec after transfer before + * ready to respond to MOD_SEL negation, and there is no way + * to tell if it is ready, so we must wait. + */ + udelay(10); + /* set QSFP MODSEL, RST. LP all high */ + dd->f_gpio_mod(dd, mask, mask, mask); + + /* + * Module could take up to 2 Msec to respond to MOD_SEL + * going away, and there is no way to tell if it is ready. + * so we must wait. + */ + if (stuck) + qib_dev_err(dd, "QSFP interface bus stuck non-idle\n"); + + if (pass >= QSFP_MAX_RETRY && ret) + qib_dev_porterr(dd, ppd->port, "QSFP failed even retrying\n"); + else if (pass) + qib_dev_porterr(dd, ppd->port, "QSFP retries: %d\n", pass); + + msleep(2); + +bail: + mutex_unlock(&dd->eep_lock); + +no_unlock: + return ret; +} + +/* + * qsfp_write + * We do not ordinarily write the QSFP, but this is needed to select + * the page on non-flat QSFPs, and possibly later unusual cases + */ +static int qib_qsfp_write(struct qib_pportdata *ppd, int addr, void *bp, + int len) +{ + struct qib_devdata *dd = ppd->dd; + u32 out, mask; + int ret, cnt; + u8 *buff = bp; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) + goto no_unlock; + + if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) { + ret = -ENXIO; + goto bail; + } + + /* + * We presume, if we are called at all, that this board has + * QSFP. This is on the same i2c chain as the legacy parts, + * but only responds if the module is selected via GPIO pins. + * Further, there are very long setup and hold requirements + * on MODSEL. + */ + mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + if (ppd->hw_pidx) { + mask <<= QSFP_GPIO_PORT2_SHIFT; + out <<= QSFP_GPIO_PORT2_SHIFT; + } + dd->f_gpio_mod(dd, out, mask, mask); + + /* + * Module could take up to 2 Msec to respond to MOD_SEL, + * and there is no way to tell if it is ready, so we must wait. + */ + msleep(2); + + /* Make sure TWSI bus is in sane state. */ + ret = qib_twsi_reset(dd); + if (ret) { + qib_dev_porterr(dd, ppd->port, + "QSFP interface Reset for write failed\n"); + ret = -EIO; + goto deselect; + } + + /* All QSFP modules are at A0 */ + + cnt = 0; + while (cnt < len) { + unsigned in_page; + int wlen = len - cnt; + in_page = addr % QSFP_PAGESIZE; + if ((in_page + wlen) > QSFP_PAGESIZE) + wlen = QSFP_PAGESIZE - in_page; + ret = qib_twsi_blk_wr(dd, QSFP_DEV, addr, buff + cnt, wlen); + if (ret) { + /* qib_twsi_blk_wr() 1 for error, else 0 */ + ret = -EIO; + goto deselect; + } + addr += wlen; + cnt += wlen; + } + ret = cnt; + +deselect: + /* + * Module could take up to 10 uSec after transfer before + * ready to respond to MOD_SEL negation, and there is no way + * to tell if it is ready, so we must wait. + */ + udelay(10); + /* set QSFP MODSEL, RST, LP high */ + dd->f_gpio_mod(dd, mask, mask, mask); + /* + * Module could take up to 2 Msec to respond to MOD_SEL + * going away, and there is no way to tell if it is ready. + * so we must wait. + */ + msleep(2); + +bail: + mutex_unlock(&dd->eep_lock); + +no_unlock: + return ret; +} + +/* + * For validation, we want to check the checksums, even of the + * fields we do not otherwise use. This function reads the bytes from + * to and returns the 8lsbs of the sum, or <0 for errors + */ +static int qsfp_cks(struct qib_pportdata *ppd, int first, int next) +{ + int ret; + u16 cks; + u8 bval; + + cks = 0; + while (first < next) { + ret = qsfp_read(ppd, first, &bval, 1); + if (ret < 0) + goto bail; + cks += bval; + ++first; + } + ret = cks & 0xFF; +bail: + return ret; + +} + +int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) +{ + int ret; + int idx; + u16 cks; + u8 peek[4]; + + /* ensure sane contents on invalid reads, for cable swaps */ + memset(cp, 0, sizeof(*cp)); + + if (!qib_qsfp_mod_present(ppd)) { + ret = -ENODEV; + goto bail; + } + + ret = qsfp_read(ppd, 0, peek, 3); + if (ret < 0) + goto bail; + if ((peek[0] & 0xFE) != 0x0C) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]); + + if ((peek[2] & 2) == 0) { + /* + * If cable is paged, rather than "flat memory", we need to + * set the page to zero, Even if it already appears to be zero. + */ + u8 poke = 0; + ret = qib_qsfp_write(ppd, 127, &poke, 1); + udelay(50); + if (ret != 1) { + qib_dev_porterr(ppd->dd, ppd->port, + "Failed QSFP Page set\n"); + goto bail; + } + } + + ret = qsfp_read(ppd, QSFP_MOD_ID_OFFS, &cp->id, 1); + if (ret < 0) + goto bail; + if ((cp->id & 0xFE) != 0x0C) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP ID byte is 0x%02X, S/B 0x0C/D\n", cp->id); + cks = cp->id; + + ret = qsfp_read(ppd, QSFP_MOD_PWR_OFFS, &cp->pwr, 1); + if (ret < 0) + goto bail; + cks += cp->pwr; + + ret = qsfp_cks(ppd, QSFP_MOD_PWR_OFFS + 1, QSFP_MOD_LEN_OFFS); + if (ret < 0) + goto bail; + cks += ret; + + ret = qsfp_read(ppd, QSFP_MOD_LEN_OFFS, &cp->len, 1); + if (ret < 0) + goto bail; + cks += cp->len; + + ret = qsfp_read(ppd, QSFP_MOD_TECH_OFFS, &cp->tech, 1); + if (ret < 0) + goto bail; + cks += cp->tech; + + ret = qsfp_read(ppd, QSFP_VEND_OFFS, &cp->vendor, QSFP_VEND_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_VEND_LEN; ++idx) + cks += cp->vendor[idx]; + + ret = qsfp_read(ppd, QSFP_IBXCV_OFFS, &cp->xt_xcv, 1); + if (ret < 0) + goto bail; + cks += cp->xt_xcv; + + ret = qsfp_read(ppd, QSFP_VOUI_OFFS, &cp->oui, QSFP_VOUI_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_VOUI_LEN; ++idx) + cks += cp->oui[idx]; + + ret = qsfp_read(ppd, QSFP_PN_OFFS, &cp->partnum, QSFP_PN_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_PN_LEN; ++idx) + cks += cp->partnum[idx]; + + ret = qsfp_read(ppd, QSFP_REV_OFFS, &cp->rev, QSFP_REV_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_REV_LEN; ++idx) + cks += cp->rev[idx]; + + ret = qsfp_read(ppd, QSFP_ATTEN_OFFS, &cp->atten, QSFP_ATTEN_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_ATTEN_LEN; ++idx) + cks += cp->atten[idx]; + + ret = qsfp_cks(ppd, QSFP_ATTEN_OFFS + QSFP_ATTEN_LEN, QSFP_CC_OFFS); + if (ret < 0) + goto bail; + cks += ret; + + cks &= 0xFF; + ret = qsfp_read(ppd, QSFP_CC_OFFS, &cp->cks1, 1); + if (ret < 0) + goto bail; + if (cks != cp->cks1) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP cks1 is %02X, computed %02X\n", cp->cks1, + cks); + + /* Second checksum covers 192 to (serial, date, lot) */ + ret = qsfp_cks(ppd, QSFP_CC_OFFS + 1, QSFP_SN_OFFS); + if (ret < 0) + goto bail; + cks = ret; + + ret = qsfp_read(ppd, QSFP_SN_OFFS, &cp->serial, QSFP_SN_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_SN_LEN; ++idx) + cks += cp->serial[idx]; + + ret = qsfp_read(ppd, QSFP_DATE_OFFS, &cp->date, QSFP_DATE_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_DATE_LEN; ++idx) + cks += cp->date[idx]; + + ret = qsfp_read(ppd, QSFP_LOT_OFFS, &cp->lot, QSFP_LOT_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_LOT_LEN; ++idx) + cks += cp->lot[idx]; + + ret = qsfp_cks(ppd, QSFP_LOT_OFFS + QSFP_LOT_LEN, QSFP_CC_EXT_OFFS); + if (ret < 0) + goto bail; + cks += ret; + + ret = qsfp_read(ppd, QSFP_CC_EXT_OFFS, &cp->cks2, 1); + if (ret < 0) + goto bail; + cks &= 0xFF; + if (cks != cp->cks2) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP cks2 is %02X, computed %02X\n", cp->cks2, + cks); + return 0; + +bail: + cp->id = 0; + return ret; +} + +const char * const qib_qsfp_devtech[16] = { + "850nm VCSEL", "1310nm VCSEL", "1550nm VCSEL", "1310nm FP", + "1310nm DFB", "1550nm DFB", "1310nm EML", "1550nm EML", + "Cu Misc", "1490nm DFB", "Cu NoEq", "Cu Eq", + "Undef", "Cu Active BothEq", "Cu FarEq", "Cu NearEq" +}; + +#define QSFP_DUMP_CHUNK 16 /* Holds longest string */ +#define QSFP_DEFAULT_HDR_CNT 224 + +static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; + +int qib_qsfp_mod_present(struct qib_pportdata *ppd) +{ + u32 mask; + int ret; + + mask = QSFP_GPIO_MOD_PRS_N << + (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT); + ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); + + return !((ret & mask) >> + ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3)); +} + +/* + * Initialize structures that control access to QSFP. Called once per port + * on cards that support QSFP. + */ +void qib_qsfp_init(struct qib_qsfp_data *qd, + void (*fevent)(struct work_struct *)) +{ + u32 mask, highs; + + struct qib_devdata *dd = qd->ppd->dd; + + /* Initialize work struct for later QSFP events */ + INIT_WORK(&qd->work, fevent); + + /* + * Later, we may want more validation. For now, just set up pins and + * blip reset. If module is present, call qib_refresh_qsfp_cache(), + * to do further init. + */ + mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + highs = mask - QSFP_GPIO_MOD_RST_N; + if (qd->ppd->hw_pidx) { + mask <<= QSFP_GPIO_PORT2_SHIFT; + highs <<= QSFP_GPIO_PORT2_SHIFT; + } + dd->f_gpio_mod(dd, highs, mask, mask); + udelay(20); /* Generous RST dwell */ + + dd->f_gpio_mod(dd, mask, mask, mask); + return; +} + +void qib_qsfp_deinit(struct qib_qsfp_data *qd) +{ + /* + * There is nothing to do here for now. our work is scheduled + * with queue_work(), and flush_workqueue() from remove_one + * will block until all work setup with queue_work() + * completes. + */ +} + +int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len) +{ + struct qib_qsfp_cache cd; + u8 bin_buff[QSFP_DUMP_CHUNK]; + char lenstr[6]; + int sofar, ret; + int bidx = 0; + + sofar = 0; + ret = qib_refresh_qsfp_cache(ppd, &cd); + if (ret < 0) + goto bail; + + lenstr[0] = ' '; + lenstr[1] = '\0'; + if (QSFP_IS_CU(cd.tech)) + sprintf(lenstr, "%dM ", cd.len); + + sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", pwr_codes + + (QSFP_PWR(cd.pwr) * 4)); + + sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", lenstr, + qib_qsfp_devtech[cd.tech >> 4]); + + sofar += scnprintf(buf + sofar, len - sofar, "Vendor:%.*s\n", + QSFP_VEND_LEN, cd.vendor); + + sofar += scnprintf(buf + sofar, len - sofar, "OUI:%06X\n", + QSFP_OUI(cd.oui)); + + sofar += scnprintf(buf + sofar, len - sofar, "Part#:%.*s\n", + QSFP_PN_LEN, cd.partnum); + sofar += scnprintf(buf + sofar, len - sofar, "Rev:%.*s\n", + QSFP_REV_LEN, cd.rev); + if (QSFP_IS_CU(cd.tech)) + sofar += scnprintf(buf + sofar, len - sofar, "Atten:%d, %d\n", + QSFP_ATTEN_SDR(cd.atten), + QSFP_ATTEN_DDR(cd.atten)); + sofar += scnprintf(buf + sofar, len - sofar, "Serial:%.*s\n", + QSFP_SN_LEN, cd.serial); + sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n", + QSFP_DATE_LEN, cd.date); + sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n", + QSFP_LOT_LEN, cd.date); + + while (bidx < QSFP_DEFAULT_HDR_CNT) { + int iidx; + ret = qsfp_read(ppd, bidx, bin_buff, QSFP_DUMP_CHUNK); + if (ret < 0) + goto bail; + for (iidx = 0; iidx < ret; ++iidx) { + sofar += scnprintf(buf + sofar, len-sofar, " %02X", + bin_buff[iidx]); + } + sofar += scnprintf(buf + sofar, len - sofar, "\n"); + bidx += QSFP_DUMP_CHUNK; + } + ret = sofar; +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h new file mode 100644 index 00000000..91908f53 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_qsfp.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* QSFP support common definitions, for ib_qib driver */ + +#define QSFP_DEV 0xA0 +#define QSFP_PWR_LAG_MSEC 2000 +#define QSFP_MODPRS_LAG_MSEC 20 + +/* + * Below are masks for various QSFP signals, for Port 1. + * Port2 equivalents are shifted by QSFP_GPIO_PORT2_SHIFT. + * _N means asserted low + */ +#define QSFP_GPIO_MOD_SEL_N (4) +#define QSFP_GPIO_MOD_PRS_N (8) +#define QSFP_GPIO_INT_N (0x10) +#define QSFP_GPIO_MOD_RST_N (0x20) +#define QSFP_GPIO_LP_MODE (0x40) +#define QSFP_GPIO_PORT2_SHIFT 5 + +#define QSFP_PAGESIZE 128 +/* Defined fields that QLogic requires of qualified cables */ +/* Byte 0 is Identifier, not checked */ +/* Byte 1 is reserved "status MSB" */ +/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */ +/* + * Rest of first 128 not used, although 127 is reserved for page select + * if module is not "Flat memory". + */ +/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */ +#define QSFP_MOD_ID_OFFS 128 +/* + * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class + * 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W + */ +#define QSFP_MOD_PWR_OFFS 129 +/* Byte 130 is Connector type. Not QLogic req'd */ +/* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */ +/* Byte 139 is encoding. code 0x01 is 8b10b. Not QLogic req'd */ +/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not QLogic req'd */ +/* Byte 141 is Extended Rate Select. Not QLogic req'd */ +/* Bytes 142..145 are lengths for various fiber types. Not QLogic req'd */ +/* Byte 146 is length for Copper. Units of 1 meter */ +#define QSFP_MOD_LEN_OFFS 146 +/* + * Byte 147 is Device technology. D0..3 not Qlogc req'd + * D4..7 select from 15 choices, translated by table: + */ +#define QSFP_MOD_TECH_OFFS 147 +extern const char *const qib_qsfp_devtech[16]; +/* Active Equalization includes fiber, copper full EQ, and copper near Eq */ +#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1) +/* Active Equalization includes fiber, copper full EQ, and copper far Eq */ +#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1) +/* Attenuation should be valid for copper other than full/near Eq */ +#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1) +/* Length is only valid if technology is "copper" */ +#define QSFP_IS_CU(tech) ((0xED00 >> ((tech) >> 4)) & 1) +#define QSFP_TECH_1490 9 + +#define QSFP_OUI(oui) (((unsigned)oui[0] << 16) | ((unsigned)oui[1] << 8) | \ + oui[2]) +#define QSFP_OUI_AMPHENOL 0x415048 +#define QSFP_OUI_FINISAR 0x009065 +#define QSFP_OUI_GORE 0x002177 + +/* Bytes 148..163 are Vendor Name, Left-justified Blank-filled */ +#define QSFP_VEND_OFFS 148 +#define QSFP_VEND_LEN 16 +/* Byte 164 is IB Extended tranceiver codes Bits D0..3 are SDR,DDR,QDR,EDR */ +#define QSFP_IBXCV_OFFS 164 +/* Bytes 165..167 are Vendor OUI number */ +#define QSFP_VOUI_OFFS 165 +#define QSFP_VOUI_LEN 3 +/* Bytes 168..183 are Vendor Part Number, string */ +#define QSFP_PN_OFFS 168 +#define QSFP_PN_LEN 16 +/* Bytes 184,185 are Vendor Rev. Left Justified, Blank-filled */ +#define QSFP_REV_OFFS 184 +#define QSFP_REV_LEN 2 +/* + * Bytes 186,187 are Wavelength, if Optical. Not Qlogic req'd + * If copper, they are attenuation in dB: + * Byte 186 is at 2.5Gb/sec (SDR), Byte 187 at 5.0Gb/sec (DDR) + */ +#define QSFP_ATTEN_OFFS 186 +#define QSFP_ATTEN_LEN 2 +/* Bytes 188,189 are Wavelength tolerance, not QLogic req'd */ +/* Byte 190 is Max Case Temp. Not QLogic req'd */ +/* Byte 191 is LSB of sum of bytes 128..190. Not QLogic req'd */ +#define QSFP_CC_OFFS 191 +/* Bytes 192..195 are Options implemented in qsfp. Not Qlogic req'd */ +/* Bytes 196..211 are Serial Number, String */ +#define QSFP_SN_OFFS 196 +#define QSFP_SN_LEN 16 +/* Bytes 212..219 are date-code YYMMDD (MM==1 for Jan) */ +#define QSFP_DATE_OFFS 212 +#define QSFP_DATE_LEN 6 +/* Bytes 218,219 are optional lot-code, string */ +#define QSFP_LOT_OFFS 218 +#define QSFP_LOT_LEN 2 +/* Bytes 220, 221 indicate monitoring options, Not QLogic req'd */ +/* Byte 223 is LSB of sum of bytes 192..222 */ +#define QSFP_CC_EXT_OFFS 223 + +/* + * struct qib_qsfp_data encapsulates state of QSFP device for one port. + * it will be part of port-chip-specific data if a board supports QSFP. + * + * Since multiple board-types use QSFP, and their pport_data structs + * differ (in the chip-specific section), we need a pointer to its head. + * + * Avoiding premature optimization, we will have one work_struct per port, + * and let the (increasingly inaccurately named) eep_lock arbitrate + * access to common resources. + * + */ + +/* + * Hold the parts of the onboard EEPROM that we care about, so we aren't + * coonstantly bit-boffing + */ +struct qib_qsfp_cache { + u8 id; /* must be 0x0C or 0x0D; 0 indicates invalid EEPROM read */ + u8 pwr; /* in D6,7 */ + u8 len; /* in meters, Cu only */ + u8 tech; + char vendor[QSFP_VEND_LEN]; + u8 xt_xcv; /* Ext. tranceiver codes, 4 lsbs are IB speed supported */ + u8 oui[QSFP_VOUI_LEN]; + u8 partnum[QSFP_PN_LEN]; + u8 rev[QSFP_REV_LEN]; + u8 atten[QSFP_ATTEN_LEN]; + u8 cks1; /* Checksum of bytes 128..190 */ + u8 serial[QSFP_SN_LEN]; + u8 date[QSFP_DATE_LEN]; + u8 lot[QSFP_LOT_LEN]; + u8 cks2; /* Checsum of bytes 192..222 */ +}; + +#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3) +#define QSFP_ATTEN_SDR(attenarray) (attenarray[0]) +#define QSFP_ATTEN_DDR(attenarray) (attenarray[1]) + +struct qib_qsfp_data { + /* Helps to find our way */ + struct qib_pportdata *ppd; + struct work_struct work; + struct qib_qsfp_cache cache; + unsigned long t_insert; + u8 modpresent; +}; + +extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, + struct qib_qsfp_cache *cp); +extern int qib_qsfp_mod_present(struct qib_pportdata *ppd); +extern void qib_qsfp_init(struct qib_qsfp_data *qd, + void (*fevent)(struct work_struct *)); +extern void qib_qsfp_deinit(struct qib_qsfp_data *qd); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c new file mode 100644 index 00000000..765b4cba --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -0,0 +1,2294 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "qib.h" + +/* cut down ridiculously long IB macro names */ +#define OP(x) IB_OPCODE_RC_##x + +static void rc_timeout(unsigned long arg); + +static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe, + u32 psn, u32 pmtu) +{ + u32 len; + + len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu; + ss->sge = wqe->sg_list[0]; + ss->sg_list = wqe->sg_list + 1; + ss->num_sge = wqe->wr.num_sge; + ss->total_len = wqe->length; + qib_skip_sge(ss, len, 0); + return wqe->length - len; +} + +static void start_timer(struct qib_qp *qp) +{ + qp->s_flags |= QIB_S_TIMER; + qp->s_timer.function = rc_timeout; + /* 4.096 usec. * (1 << qp->timeout) */ + qp->s_timer.expires = jiffies + qp->timeout_jiffies; + add_timer(&qp->s_timer); +} + +/** + * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) + * @dev: the device for this QP + * @qp: a pointer to the QP + * @ohdr: a pointer to the IB header being constructed + * @pmtu: the path MTU + * + * Return 1 if constructed; otherwise, return 0. + * Note that we are in the responder's side of the QP context. + * Note the QP s_lock must be held. + */ +static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, + struct qib_other_headers *ohdr, u32 pmtu) +{ + struct qib_ack_entry *e; + u32 hwords; + u32 len; + u32 bth0; + u32 bth2; + + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + goto bail; + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + + switch (qp->s_ack_state) { + case OP(RDMA_READ_RESPONSE_LAST): + case OP(RDMA_READ_RESPONSE_ONLY): + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + /* FALLTHROUGH */ + case OP(ATOMIC_ACKNOWLEDGE): + /* + * We can increment the tail pointer now that the last + * response has been sent instead of only being + * constructed. + */ + if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; + /* FALLTHROUGH */ + case OP(SEND_ONLY): + case OP(ACKNOWLEDGE): + /* Check for no next entry in the queue. */ + if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { + if (qp->s_flags & QIB_S_ACK_PENDING) + goto normal; + goto bail; + } + + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST)) { + /* + * If a RDMA read response is being resent and + * we haven't seen the duplicate request yet, + * then stop sending the remaining responses the + * responder has seen until the requester resends it. + */ + len = e->rdma_sge.sge_length; + if (len && !e->rdma_sge.mr) { + qp->s_tail_ack_queue = qp->r_head_ack_queue; + goto bail; + } + /* Copy SGE state in case we need to resend */ + qp->s_rdma_mr = e->rdma_sge.mr; + if (qp->s_rdma_mr) + atomic_inc(&qp->s_rdma_mr->refcount); + qp->s_ack_rdma_sge.sge = e->rdma_sge; + qp->s_ack_rdma_sge.num_sge = 1; + qp->s_cur_sge = &qp->s_ack_rdma_sge; + if (len > pmtu) { + len = pmtu; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); + } else { + qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); + e->sent = 1; + } + ohdr->u.aeth = qib_compute_aeth(qp); + hwords++; + qp->s_ack_rdma_psn = e->psn; + bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK; + } else { + /* COMPARE_SWAP or FETCH_ADD */ + qp->s_cur_sge = NULL; + len = 0; + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); + ohdr->u.at.aeth = qib_compute_aeth(qp); + ohdr->u.at.atomic_ack_eth[0] = + cpu_to_be32(e->atomic_data >> 32); + ohdr->u.at.atomic_ack_eth[1] = + cpu_to_be32(e->atomic_data); + hwords += sizeof(ohdr->u.at) / sizeof(u32); + bth2 = e->psn & QIB_PSN_MASK; + e->sent = 1; + } + bth0 = qp->s_ack_state << 24; + break; + + case OP(RDMA_READ_RESPONSE_FIRST): + qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_READ_RESPONSE_MIDDLE): + qp->s_cur_sge = &qp->s_ack_rdma_sge; + qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; + if (qp->s_rdma_mr) + atomic_inc(&qp->s_rdma_mr->refcount); + len = qp->s_ack_rdma_sge.sge.sge_length; + if (len > pmtu) + len = pmtu; + else { + ohdr->u.aeth = qib_compute_aeth(qp); + hwords++; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + e->sent = 1; + } + bth0 = qp->s_ack_state << 24; + bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK; + break; + + default: +normal: + /* + * Send a regular ACK. + * Set the s_ack_state so we wait until after sending + * the ACK before setting s_ack_state to ACKNOWLEDGE + * (see above). + */ + qp->s_ack_state = OP(SEND_ONLY); + qp->s_flags &= ~QIB_S_ACK_PENDING; + qp->s_cur_sge = NULL; + if (qp->s_nak_state) + ohdr->u.aeth = + cpu_to_be32((qp->r_msn & QIB_MSN_MASK) | + (qp->s_nak_state << + QIB_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = qib_compute_aeth(qp); + hwords++; + len = 0; + bth0 = OP(ACKNOWLEDGE) << 24; + bth2 = qp->s_ack_psn & QIB_PSN_MASK; + } + qp->s_rdma_ack_cnt++; + qp->s_hdrwords = hwords; + qp->s_cur_size = len; + qib_make_ruc_header(qp, ohdr, bth0, bth2); + return 1; + +bail: + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING); + return 0; +} + +/** + * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) + * @qp: a pointer to the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int qib_make_rc_req(struct qib_qp *qp) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct qib_other_headers *ohdr; + struct qib_sge_state *ss; + struct qib_swqe *wqe; + u32 hwords; + u32 len; + u32 bth0; + u32 bth2; + u32 pmtu = qp->pmtu; + char newreq; + unsigned long flags; + int ret = 0; + int delta; + + ohdr = &qp->s_hdr.u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr.u.l.oth; + + /* + * The lock is needed to synchronize between the sending tasklet, + * the receive interrupt handler, and timeout resends. + */ + spin_lock_irqsave(&qp->s_lock, flags); + + /* Sending responses has higher priority over sending requests. */ + if ((qp->s_flags & QIB_S_RESP_PENDING) && + qib_make_rc_ack(dev, qp, ohdr, pmtu)) + goto done; + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) { + if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); + /* will get called again */ + goto done; + } + + if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK)) + goto bail; + + if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) { + if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) { + qp->s_flags |= QIB_S_WAIT_PSN; + goto bail; + } + qp->s_sending_psn = qp->s_psn; + qp->s_sending_hpsn = qp->s_psn - 1; + } + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + bth0 = 0; + + /* Send a request. */ + wqe = get_swqe_ptr(qp, qp->s_cur); + switch (qp->s_state) { + default: + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) + goto bail; + /* + * Resend an old request or start a new one. + * + * We keep track of the current SWQE so that + * we don't reset the "furthest progress" state + * if we need to back up. + */ + newreq = 0; + if (qp->s_cur == qp->s_tail) { + /* Check if send work queue is empty. */ + if (qp->s_tail == qp->s_head) + goto bail; + /* + * If a fence is requested, wait for previous + * RDMA read and atomic operations to finish. + */ + if ((wqe->wr.send_flags & IB_SEND_FENCE) && + qp->s_num_rd_atomic) { + qp->s_flags |= QIB_S_WAIT_FENCE; + goto bail; + } + wqe->psn = qp->s_next_psn; + newreq = 1; + } + /* + * Note that we have to be careful not to modify the + * original work request since we may need to resend + * it. + */ + len = wqe->length; + ss = &qp->s_sge; + bth2 = qp->s_psn & QIB_PSN_MASK; + switch (wqe->wr.opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + /* If no credit, return. */ + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) && + qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; + goto bail; + } + wqe->lpsn = wqe->psn; + if (len > pmtu) { + wqe->lpsn += (len - 1) / pmtu; + qp->s_state = OP(SEND_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_ONLY); + else { + qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_WRITE: + if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + qp->s_lsn++; + /* FALLTHROUGH */ + case IB_WR_RDMA_WRITE_WITH_IMM: + /* If no credit, return. */ + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) && + qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; + goto bail; + } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + hwords += sizeof(struct ib_reth) / sizeof(u32); + wqe->lpsn = wqe->psn; + if (len > pmtu) { + wqe->lpsn += (len - 1) / pmtu; + qp->s_state = OP(RDMA_WRITE_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_ONLY); + else { + qp->s_state = + OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after RETH */ + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_READ: + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ + if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= QIB_S_WAIT_RDMAR; + goto bail; + } + qp->s_num_rd_atomic++; + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + qp->s_lsn++; + /* + * Adjust s_next_psn to count the + * expected number of responses. + */ + if (len > pmtu) + qp->s_next_psn += (len - 1) / pmtu; + wqe->lpsn = qp->s_next_psn++; + } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); + ss = NULL; + len = 0; + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ + if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= QIB_S_WAIT_RDMAR; + goto bail; + } + qp->s_num_rd_atomic++; + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + qp->s_lsn++; + wqe->lpsn = wqe->psn; + } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + qp->s_state = OP(COMPARE_SWAP); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.swap); + ohdr->u.atomic_eth.compare_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + } else { + qp->s_state = OP(FETCH_ADD); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + ohdr->u.atomic_eth.compare_data = 0; + } + ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr >> 32); + ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr); + ohdr->u.atomic_eth.rkey = cpu_to_be32( + wqe->wr.wr.atomic.rkey); + hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); + ss = NULL; + len = 0; + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + default: + goto bail; + } + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_sge.total_len = wqe->length; + qp->s_len = wqe->length; + if (newreq) { + qp->s_tail++; + if (qp->s_tail >= qp->s_size) + qp->s_tail = 0; + } + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_psn = wqe->lpsn + 1; + else { + qp->s_psn++; + if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + } + break; + + case OP(RDMA_READ_RESPONSE_FIRST): + /* + * qp->s_state is normally set to the opcode of the + * last packet constructed for new requests and therefore + * is never set to RDMA read response. + * RDMA_READ_RESPONSE_FIRST is used by the ACK processing + * thread to indicate a SEND needs to be restarted from an + * earlier PSN without interferring with the sending thread. + * See qib_restart_rc(). + */ + qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); + /* FALLTHROUGH */ + case OP(SEND_FIRST): + qp->s_state = OP(SEND_MIDDLE); + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + bth2 = qp->s_psn++ & QIB_PSN_MASK; + if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + ss = &qp->s_sge; + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_LAST); + else { + qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth2 |= IB_BTH_REQ_ACK; + qp->s_cur++; + if (qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_READ_RESPONSE_LAST): + /* + * qp->s_state is normally set to the opcode of the + * last packet constructed for new requests and therefore + * is never set to RDMA read response. + * RDMA_READ_RESPONSE_LAST is used by the ACK processing + * thread to indicate a RDMA write needs to be restarted from + * an earlier PSN without interferring with the sending thread. + * See qib_restart_rc(). + */ + qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_FIRST): + qp->s_state = OP(RDMA_WRITE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + bth2 = qp->s_psn++ & QIB_PSN_MASK; + if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + ss = &qp->s_sge; + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_LAST); + else { + qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + bth2 |= IB_BTH_REQ_ACK; + qp->s_cur++; + if (qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_READ_RESPONSE_MIDDLE): + /* + * qp->s_state is normally set to the opcode of the + * last packet constructed for new requests and therefore + * is never set to RDMA read response. + * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing + * thread to indicate a RDMA read needs to be restarted from + * an earlier PSN without interferring with the sending thread. + * See qib_restart_rc(). + */ + len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu; + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); + bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK; + qp->s_psn = wqe->lpsn + 1; + ss = NULL; + len = 0; + qp->s_cur++; + if (qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + } + qp->s_sending_hpsn = bth2; + delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8; + if (delta && delta % QIB_PSN_CREDIT == 0) + bth2 |= IB_BTH_REQ_ACK; + if (qp->s_flags & QIB_S_SEND_ONE) { + qp->s_flags &= ~QIB_S_SEND_ONE; + qp->s_flags |= QIB_S_WAIT_ACK; + bth2 |= IB_BTH_REQ_ACK; + } + qp->s_len -= len; + qp->s_hdrwords = hwords; + qp->s_cur_sge = ss; + qp->s_cur_size = len; + qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * qib_send_rc_ack - Construct an ACK packet and send it + * @qp: a pointer to the QP + * + * This is called from qib_rc_rcv() and qib_kreceive(). + * Note that RDMA reads and atomics are handled in the + * send side QP state and tasklet. + */ +void qib_send_rc_ack(struct qib_qp *qp) +{ + struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u64 pbc; + u16 lrh0; + u32 bth0; + u32 hwords; + u32 pbufn; + u32 __iomem *piobuf; + struct qib_ib_header hdr; + struct qib_other_headers *ohdr; + u32 control; + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + goto unlock; + + /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ + if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt) + goto queue_ack; + + /* Construct the header with s_lock held so APM doesn't change it. */ + ohdr = &hdr.u.oth; + lrh0 = QIB_LRH_BTH; + /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ + hwords = 6; + if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + hwords += qib_make_grh(ibp, &hdr.u.l.grh, + &qp->remote_ah_attr.grh, hwords, 0); + ohdr = &hdr.u.l.oth; + lrh0 = QIB_LRH_GRH; + } + /* read pkey_index w/o lock (its atomic) */ + bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24); + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth0 |= IB_BTH_MIG_REQ; + if (qp->r_nak_state) + ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) | + (qp->r_nak_state << + QIB_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = qib_compute_aeth(qp); + lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | + qp->remote_ah_attr.sl << 4; + hdr.lrh[0] = cpu_to_be16(lrh0); + hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); + hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits); + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK); + + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Don't try to send ACKs if the link isn't ACTIVE */ + if (!(ppd->lflags & QIBL_LINKACTIVE)) + goto done; + + control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC, + qp->s_srate, lrh0 >> 12); + /* length is + 1 for the control dword */ + pbc = ((u64) control << 32) | (hwords + 1); + + piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn); + if (!piobuf) { + /* + * We are out of PIO buffers at the moment. + * Pass responsibility for sending the ACK to the + * send tasklet so that when a PIO buffer becomes + * available, the ACK is sent ahead of other outgoing + * packets. + */ + spin_lock_irqsave(&qp->s_lock, flags); + goto queue_ack; + } + + /* + * Write the pbc. + * We have to flush after the PBC for correctness + * on some cpus or WC buffer can be written out of order. + */ + writeq(pbc, piobuf); + + if (dd->flags & QIB_PIO_FLUSH_WC) { + u32 *hdrp = (u32 *) &hdr; + + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdrp, hwords - 1); + qib_flush_wc(); + __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1); + } else + qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords); + + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + + qib_flush_wc(); + qib_sendbuf_done(dd, pbufn); + + ibp->n_unicast_xmit++; + goto done; + +queue_ack: + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + ibp->n_rc_qacks++; + qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + qib_schedule_send(qp); + } +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); +done: + return; +} + +/** + * reset_psn - reset the QP state to send starting from PSN + * @qp: the QP + * @psn: the packet sequence number to restart at + * + * This is called from qib_rc_rcv() to process an incoming RC ACK + * for the given QP. + * Called at interrupt level with the QP s_lock held. + */ +static void reset_psn(struct qib_qp *qp, u32 psn) +{ + u32 n = qp->s_acked; + struct qib_swqe *wqe = get_swqe_ptr(qp, n); + u32 opcode; + + qp->s_cur = n; + + /* + * If we are starting the request from the beginning, + * let the normal send code handle initialization. + */ + if (qib_cmp24(psn, wqe->psn) <= 0) { + qp->s_state = OP(SEND_LAST); + goto done; + } + + /* Find the work request opcode corresponding to the given PSN. */ + opcode = wqe->wr.opcode; + for (;;) { + int diff; + + if (++n == qp->s_size) + n = 0; + if (n == qp->s_tail) + break; + wqe = get_swqe_ptr(qp, n); + diff = qib_cmp24(psn, wqe->psn); + if (diff < 0) + break; + qp->s_cur = n; + /* + * If we are starting the request from the beginning, + * let the normal send code handle initialization. + */ + if (diff == 0) { + qp->s_state = OP(SEND_LAST); + goto done; + } + opcode = wqe->wr.opcode; + } + + /* + * Set the state to restart in the middle of a request. + * Don't change the s_sge, s_cur_sge, or s_cur_size. + * See qib_make_rc_req(). + */ + switch (opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + qp->s_state = OP(RDMA_READ_RESPONSE_LAST); + break; + + case IB_WR_RDMA_READ: + qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); + break; + + default: + /* + * This case shouldn't happen since its only + * one PSN per req. + */ + qp->s_state = OP(SEND_LAST); + } +done: + qp->s_psn = psn; + /* + * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer + * asynchronously before the send tasklet can get scheduled. + * Doing it in qib_make_rc_req() is too late. + */ + if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) && + (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) + qp->s_flags |= QIB_S_WAIT_PSN; +} + +/* + * Back up requester to resend the last un-ACKed request. + * The QP r_lock and s_lock should be held and interrupts disabled. + */ +static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait) +{ + struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); + struct qib_ibport *ibp; + + if (qp->s_retry == 0) { + if (qp->s_mig_state == IB_MIG_ARMED) { + qib_migrate_qp(qp); + qp->s_retry = qp->s_retry_cnt; + } else if (qp->s_last == qp->s_acked) { + qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); + qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + return; + } else /* XXX need to handle delayed completion */ + return; + } else + qp->s_retry--; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + if (wqe->wr.opcode == IB_WR_RDMA_READ) + ibp->n_rc_resends++; + else + ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; + + qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | + QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN | + QIB_S_WAIT_ACK); + if (wait) + qp->s_flags |= QIB_S_SEND_ONE; + reset_psn(qp, psn); +} + +/* + * This is called from s_timer for missing responses. + */ +static void rc_timeout(unsigned long arg) +{ + struct qib_qp *qp = (struct qib_qp *)arg; + struct qib_ibport *ibp; + unsigned long flags; + + spin_lock_irqsave(&qp->r_lock, flags); + spin_lock(&qp->s_lock); + if (qp->s_flags & QIB_S_TIMER) { + ibp = to_iport(qp->ibqp.device, qp->port_num); + ibp->n_rc_timeouts++; + qp->s_flags &= ~QIB_S_TIMER; + del_timer(&qp->s_timer); + qib_restart_rc(qp, qp->s_last_psn + 1, 1); + qib_schedule_send(qp); + } + spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->r_lock, flags); +} + +/* + * This is called from s_timer for RNR timeouts. + */ +void qib_rc_rnr_retry(unsigned long arg) +{ + struct qib_qp *qp = (struct qib_qp *)arg; + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_RNR) { + qp->s_flags &= ~QIB_S_WAIT_RNR; + del_timer(&qp->s_timer); + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); +} + +/* + * Set qp->s_sending_psn to the next PSN after the given one. + * This would be psn+1 except when RDMA reads are present. + */ +static void reset_sending_psn(struct qib_qp *qp, u32 psn) +{ + struct qib_swqe *wqe; + u32 n = qp->s_last; + + /* Find the work request corresponding to the given PSN. */ + for (;;) { + wqe = get_swqe_ptr(qp, n); + if (qib_cmp24(psn, wqe->lpsn) <= 0) { + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_sending_psn = wqe->lpsn + 1; + else + qp->s_sending_psn = psn + 1; + break; + } + if (++n == qp->s_size) + n = 0; + if (n == qp->s_tail) + break; + } +} + +/* + * This should be called with the QP s_lock held and interrupts disabled. + */ +void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) +{ + struct qib_other_headers *ohdr; + struct qib_swqe *wqe; + struct ib_wc wc; + unsigned i; + u32 opcode; + u32 psn; + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + return; + + /* Find out where the BTH is */ + if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) { + WARN_ON(!qp->s_rdma_ack_cnt); + qp->s_rdma_ack_cnt--; + return; + } + + psn = be32_to_cpu(ohdr->bth[2]); + reset_sending_psn(qp, psn); + + /* + * Start timer after a packet requesting an ACK has been sent and + * there are still requests that haven't been acked. + */ + if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && + !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) && + (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + start_timer(qp); + + while (qp->s_last != qp->s_acked) { + wqe = get_swqe_ptr(qp, qp->s_last); + if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 && + qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) + break; + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + } + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + /* + * If we were waiting for sends to complete before resending, + * and they are now complete, restart sending. + */ + if (qp->s_flags & QIB_S_WAIT_PSN && + qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { + qp->s_flags &= ~QIB_S_WAIT_PSN; + qp->s_sending_psn = qp->s_psn; + qp->s_sending_hpsn = qp->s_psn - 1; + qib_schedule_send(qp); + } +} + +static inline void update_last_psn(struct qib_qp *qp, u32 psn) +{ + qp->s_last_psn = psn; +} + +/* + * Generate a SWQE completion. + * This is similar to qib_send_complete but has to check to be sure + * that the SGEs are not being referenced if the SWQE is being resent. + */ +static struct qib_swqe *do_rc_completion(struct qib_qp *qp, + struct qib_swqe *wqe, + struct qib_ibport *ibp) +{ + struct ib_wc wc; + unsigned i; + + /* + * Don't decrement refcount and don't generate a + * completion if the SWQE is being resent until the send + * is finished. + */ + if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 || + qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + } + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } else + ibp->n_rc_delayed_comp++; + + qp->s_retry = qp->s_retry_cnt; + update_last_psn(qp, wqe->lpsn); + + /* + * If we are completing a request which is in the process of + * being resent, we can stop resending it since we know the + * responder has already seen it. + */ + if (qp->s_acked == qp->s_cur) { + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + qp->s_acked = qp->s_cur; + wqe = get_swqe_ptr(qp, qp->s_cur); + if (qp->s_acked != qp->s_tail) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = wqe->psn; + } + } else { + if (++qp->s_acked >= qp->s_size) + qp->s_acked = 0; + if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur) + qp->s_draining = 0; + wqe = get_swqe_ptr(qp, qp->s_acked); + } + return wqe; +} + +/** + * do_rc_ack - process an incoming RC ACK + * @qp: the QP the ACK came in on + * @psn: the packet sequence number of the ACK + * @opcode: the opcode of the request that resulted in the ACK + * + * This is called from qib_rc_rcv_resp() to process an incoming RC ACK + * for the given QP. + * Called at interrupt level with the QP s_lock held. + * Returns 1 if OK, 0 if current operation should be aborted (NAK). + */ +static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode, + u64 val, struct qib_ctxtdata *rcd) +{ + struct qib_ibport *ibp; + enum ib_wc_status status; + struct qib_swqe *wqe; + int ret = 0; + u32 ack_psn; + int diff; + + /* Remove QP from retry timer */ + if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + /* + * Note that NAKs implicitly ACK outstanding SEND and RDMA write + * requests and implicitly NAK RDMA read and atomic requests issued + * before the NAK'ed request. The MSN won't include the NAK'ed + * request but will include an ACK'ed request(s). + */ + ack_psn = psn; + if (aeth >> 29) + ack_psn--; + wqe = get_swqe_ptr(qp, qp->s_acked); + ibp = to_iport(qp->ibqp.device, qp->port_num); + + /* + * The MSN might be for a later WQE than the PSN indicates so + * only complete WQEs that the PSN finishes. + */ + while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) { + /* + * RDMA_READ_RESPONSE_ONLY is a special case since + * we want to generate completion events for everything + * before the RDMA read, copy the data, then generate + * the completion for the read. + */ + if (wqe->wr.opcode == IB_WR_RDMA_READ && + opcode == OP(RDMA_READ_RESPONSE_ONLY) && + diff == 0) { + ret = 1; + goto bail; + } + /* + * If this request is a RDMA read or atomic, and the ACK is + * for a later operation, this ACK NAKs the RDMA read or + * atomic. In other words, only a RDMA_READ_LAST or ONLY + * can ACK a RDMA read and likewise for atomic ops. Note + * that the NAK case can only happen if relaxed ordering is + * used and requests are sent after an RDMA read or atomic + * is sent but before the response is received. + */ + if ((wqe->wr.opcode == IB_WR_RDMA_READ && + (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || + ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && + (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { + /* Retry this request. */ + if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) { + qp->r_flags |= QIB_R_RDMAR_SEQ; + qib_restart_rc(qp, qp->s_last_psn + 1, 0); + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_SEND; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, + &rcd->qp_wait_list); + } + } + /* + * No need to process the ACK/NAK since we are + * restarting an earlier request. + */ + goto bail; + } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + u64 *vaddr = wqe->sg_list[0].vaddr; + *vaddr = val; + } + if (qp->s_num_rd_atomic && + (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { + qp->s_num_rd_atomic--; + /* Restart sending task if fence is complete */ + if ((qp->s_flags & QIB_S_WAIT_FENCE) && + !qp->s_num_rd_atomic) { + qp->s_flags &= ~(QIB_S_WAIT_FENCE | + QIB_S_WAIT_ACK); + qib_schedule_send(qp); + } else if (qp->s_flags & QIB_S_WAIT_RDMAR) { + qp->s_flags &= ~(QIB_S_WAIT_RDMAR | + QIB_S_WAIT_ACK); + qib_schedule_send(qp); + } + } + wqe = do_rc_completion(qp, wqe, ibp); + if (qp->s_acked == qp->s_tail) + break; + } + + switch (aeth >> 29) { + case 0: /* ACK */ + ibp->n_rc_acks++; + if (qp->s_acked != qp->s_tail) { + /* + * We are expecting more ACKs so + * reset the retransmit timer. + */ + start_timer(qp); + /* + * We can stop resending the earlier packets and + * continue with the next packet the receiver wants. + */ + if (qib_cmp24(qp->s_psn, psn) <= 0) + reset_psn(qp, psn + 1); + } else if (qib_cmp24(qp->s_psn, psn) <= 0) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = psn + 1; + } + if (qp->s_flags & QIB_S_WAIT_ACK) { + qp->s_flags &= ~QIB_S_WAIT_ACK; + qib_schedule_send(qp); + } + qib_get_credit(qp, aeth); + qp->s_rnr_retry = qp->s_rnr_retry_cnt; + qp->s_retry = qp->s_retry_cnt; + update_last_psn(qp, psn); + ret = 1; + goto bail; + + case 1: /* RNR NAK */ + ibp->n_rnr_naks++; + if (qp->s_acked == qp->s_tail) + goto bail; + if (qp->s_flags & QIB_S_WAIT_RNR) + goto bail; + if (qp->s_rnr_retry == 0) { + status = IB_WC_RNR_RETRY_EXC_ERR; + goto class_b; + } + if (qp->s_rnr_retry_cnt < 7) + qp->s_rnr_retry--; + + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); + + ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; + + reset_psn(qp, psn); + + qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK); + qp->s_flags |= QIB_S_WAIT_RNR; + qp->s_timer.function = qib_rc_rnr_retry; + qp->s_timer.expires = jiffies + usecs_to_jiffies( + ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) & + QIB_AETH_CREDIT_MASK]); + add_timer(&qp->s_timer); + goto bail; + + case 3: /* NAK */ + if (qp->s_acked == qp->s_tail) + goto bail; + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); + switch ((aeth >> QIB_AETH_CREDIT_SHIFT) & + QIB_AETH_CREDIT_MASK) { + case 0: /* PSN sequence error */ + ibp->n_seq_naks++; + /* + * Back up to the responder's expected PSN. + * Note that we might get a NAK in the middle of an + * RDMA READ response which terminates the RDMA + * READ. + */ + qib_restart_rc(qp, psn, 0); + qib_schedule_send(qp); + break; + + case 1: /* Invalid Request */ + status = IB_WC_REM_INV_REQ_ERR; + ibp->n_other_naks++; + goto class_b; + + case 2: /* Remote Access Error */ + status = IB_WC_REM_ACCESS_ERR; + ibp->n_other_naks++; + goto class_b; + + case 3: /* Remote Operation Error */ + status = IB_WC_REM_OP_ERR; + ibp->n_other_naks++; +class_b: + if (qp->s_last == qp->s_acked) { + qib_send_complete(qp, wqe, status); + qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } + break; + + default: + /* Ignore other reserved NAK error codes */ + goto reserved; + } + qp->s_retry = qp->s_retry_cnt; + qp->s_rnr_retry = qp->s_rnr_retry_cnt; + goto bail; + + default: /* 2: reserved */ +reserved: + /* Ignore reserved NAK codes. */ + goto bail; + } + +bail: + return ret; +} + +/* + * We have seen an out of sequence RDMA read middle or last packet. + * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE. + */ +static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn, + struct qib_ctxtdata *rcd) +{ + struct qib_swqe *wqe; + + /* Remove QP from retry timer */ + if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + wqe = get_swqe_ptr(qp, qp->s_acked); + + while (qib_cmp24(psn, wqe->lpsn) > 0) { + if (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + break; + wqe = do_rc_completion(qp, wqe, ibp); + } + + ibp->n_rdma_seq++; + qp->r_flags |= QIB_R_RDMAR_SEQ; + qib_restart_rc(qp, qp->s_last_psn + 1, 0); + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_SEND; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } +} + +/** + * qib_rc_rcv_resp - process an incoming RC response packet + * @ibp: the port this packet came in on + * @ohdr: the other headers for this packet + * @data: the packet data + * @tlen: the packet length + * @qp: the QP for this packet + * @opcode: the opcode for this packet + * @psn: the packet sequence number for this packet + * @hdrsize: the header length + * @pmtu: the path MTU + * + * This is called from qib_rc_rcv() to process an incoming RC response + * packet for the given QP. + * Called at interrupt level. + */ +static void qib_rc_rcv_resp(struct qib_ibport *ibp, + struct qib_other_headers *ohdr, + void *data, u32 tlen, + struct qib_qp *qp, + u32 opcode, + u32 psn, u32 hdrsize, u32 pmtu, + struct qib_ctxtdata *rcd) +{ + struct qib_swqe *wqe; + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + enum ib_wc_status status; + unsigned long flags; + int diff; + u32 pad; + u32 aeth; + u64 val; + + if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) { + /* + * If ACK'd PSN on SDMA busy list try to make progress to + * reclaim SDMA credits. + */ + if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) && + (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) { + + /* + * If send tasklet not running attempt to progress + * SDMA queue. + */ + if (!(qp->s_flags & QIB_S_BUSY)) { + /* Acquire SDMA Lock */ + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* Invoke sdma make progress */ + qib_sdma_make_progress(ppd); + /* Release SDMA Lock */ + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + + spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + goto ack_done; + + /* Ignore invalid responses. */ + if (qib_cmp24(psn, qp->s_next_psn) >= 0) + goto ack_done; + + /* Ignore duplicate responses. */ + diff = qib_cmp24(psn, qp->s_last_psn); + if (unlikely(diff <= 0)) { + /* Update credits for "ghost" ACKs */ + if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { + aeth = be32_to_cpu(ohdr->u.aeth); + if ((aeth >> 29) == 0) + qib_get_credit(qp, aeth); + } + goto ack_done; + } + + /* + * Skip everything other than the PSN we expect, if we are waiting + * for a reply to a restarted RDMA read or atomic op. + */ + if (qp->r_flags & QIB_R_RDMAR_SEQ) { + if (qib_cmp24(psn, qp->s_last_psn + 1) != 0) + goto ack_done; + qp->r_flags &= ~QIB_R_RDMAR_SEQ; + } + + if (unlikely(qp->s_acked == qp->s_tail)) + goto ack_done; + wqe = get_swqe_ptr(qp, qp->s_acked); + status = IB_WC_SUCCESS; + + switch (opcode) { + case OP(ACKNOWLEDGE): + case OP(ATOMIC_ACKNOWLEDGE): + case OP(RDMA_READ_RESPONSE_FIRST): + aeth = be32_to_cpu(ohdr->u.aeth); + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { + __be32 *p = ohdr->u.at.atomic_ack_eth; + + val = ((u64) be32_to_cpu(p[0]) << 32) | + be32_to_cpu(p[1]); + } else + val = 0; + if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || + opcode != OP(RDMA_READ_RESPONSE_FIRST)) + goto ack_done; + hdrsize += 4; + wqe = get_swqe_ptr(qp, qp->s_acked); + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. + */ + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_middle; + + case OP(RDMA_READ_RESPONSE_MIDDLE): + /* no AETH, no ACK */ + if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1))) + goto ack_seq_err; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; +read_middle: + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto ack_len_err; + if (unlikely(pmtu >= qp->s_rdma_read_len)) + goto ack_len_err; + + /* + * We got a response so update the timeout. + * 4.096 usec. * (1 << qp->timeout) + */ + qp->s_flags |= QIB_S_TIMER; + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); + if (qp->s_flags & QIB_S_WAIT_ACK) { + qp->s_flags &= ~QIB_S_WAIT_ACK; + qib_schedule_send(qp); + } + + if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) + qp->s_retry = qp->s_retry_cnt; + + /* + * Update the RDMA receive state but do the copy w/o + * holding the locks and blocking interrupts. + */ + qp->s_rdma_read_len -= pmtu; + update_last_psn(qp, psn); + spin_unlock_irqrestore(&qp->s_lock, flags); + qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0); + goto bail; + + case OP(RDMA_READ_RESPONSE_ONLY): + aeth = be32_to_cpu(ohdr->u.aeth); + if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) + goto ack_done; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* + * Check that the data size is >= 0 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen < (hdrsize + pad + 8))) + goto ack_len_err; + /* + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. + */ + wqe = get_swqe_ptr(qp, qp->s_acked); + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_last; + + case OP(RDMA_READ_RESPONSE_LAST): + /* ACKs READ req. */ + if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1))) + goto ack_seq_err; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* + * Check that the data size is >= 1 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen <= (hdrsize + pad + 8))) + goto ack_len_err; +read_last: + tlen -= hdrsize + pad + 8; + if (unlikely(tlen != qp->s_rdma_read_len)) + goto ack_len_err; + aeth = be32_to_cpu(ohdr->u.aeth); + qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0); + WARN_ON(qp->s_rdma_read_sge.num_sge); + (void) do_rc_ack(qp, aeth, psn, + OP(RDMA_READ_RESPONSE_LAST), 0, rcd); + goto ack_done; + } + +ack_op_err: + status = IB_WC_LOC_QP_OP_ERR; + goto ack_err; + +ack_seq_err: + rdma_seq_err(qp, ibp, psn, rcd); + goto ack_done; + +ack_len_err: + status = IB_WC_LOC_LEN_ERR; +ack_err: + if (qp->s_last == qp->s_acked) { + qib_send_complete(qp, wqe, status); + qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } +ack_done: + spin_unlock_irqrestore(&qp->s_lock, flags); +bail: + return; +} + +/** + * qib_rc_rcv_error - process an incoming duplicate or error RC packet + * @ohdr: the other headers for this packet + * @data: the packet data + * @qp: the QP for this packet + * @opcode: the opcode for this packet + * @psn: the packet sequence number for this packet + * @diff: the difference between the PSN and the expected PSN + * + * This is called from qib_rc_rcv() to process an unexpected + * incoming RC packet for the given QP. + * Called at interrupt level. + * Return 1 if no more processing is needed; otherwise return 0 to + * schedule a response to be sent. + */ +static int qib_rc_rcv_error(struct qib_other_headers *ohdr, + void *data, + struct qib_qp *qp, + u32 opcode, + u32 psn, + int diff, + struct qib_ctxtdata *rcd) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_ack_entry *e; + unsigned long flags; + u8 i, prev; + int old_req; + + if (diff > 0) { + /* + * Packet sequence error. + * A NAK will ACK earlier sends and RDMA writes. + * Don't queue the NAK if we already sent one. + */ + if (!qp->r_nak_state) { + ibp->n_rc_seqnak++; + qp->r_nak_state = IB_NAK_PSN_ERROR; + /* Use the expected PSN. */ + qp->r_ack_psn = qp->r_psn; + /* + * Wait to send the sequence NAK until all packets + * in the receive queue have been processed. + * Otherwise, we end up propagating congestion. + */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + } + goto done; + } + + /* + * Handle a duplicate request. Don't re-execute SEND, RDMA + * write or atomic op. Don't NAK errors, just silently drop + * the duplicate request. Note that r_sge, r_len, and + * r_rcv_len may be in use so don't modify them. + * + * We are supposed to ACK the earliest duplicate PSN but we + * can coalesce an outstanding duplicate ACK. We have to + * send the earliest so that RDMA reads can be restarted at + * the requester's expected PSN. + * + * First, find where this duplicate PSN falls within the + * ACKs previously sent. + * old_req is true if there is an older response that is scheduled + * to be sent before sending this one. + */ + e = NULL; + old_req = 1; + ibp->n_rc_dupreq++; + + spin_lock_irqsave(&qp->s_lock, flags); + + for (i = qp->r_head_ack_queue; ; i = prev) { + if (i == qp->s_tail_ack_queue) + old_req = 0; + if (i) + prev = i - 1; + else + prev = QIB_MAX_RDMA_ATOMIC; + if (prev == qp->r_head_ack_queue) { + e = NULL; + break; + } + e = &qp->s_ack_queue[prev]; + if (!e->opcode) { + e = NULL; + break; + } + if (qib_cmp24(psn, e->psn) >= 0) { + if (prev == qp->s_tail_ack_queue && + qib_cmp24(psn, e->lpsn) <= 0) + old_req = 0; + break; + } + } + switch (opcode) { + case OP(RDMA_READ_REQUEST): { + struct ib_reth *reth; + u32 offset; + u32 len; + + /* + * If we didn't find the RDMA read request in the ack queue, + * we can ignore this request. + */ + if (!e || e->opcode != OP(RDMA_READ_REQUEST)) + goto unlock_done; + /* RETH comes after BTH */ + reth = &ohdr->u.rc.reth; + /* + * Address range must be a subset of the original + * request and start on pmtu boundaries. + * We reuse the old ack_queue slot since the requester + * should not back up and request an earlier PSN for the + * same request. + */ + offset = ((psn - e->psn) & QIB_PSN_MASK) * + qp->pmtu; + len = be32_to_cpu(reth->length); + if (unlikely(offset + len != e->rdma_sge.sge_length)) + goto unlock_done; + if (e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + if (len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, + IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) + goto unlock_done; + } else { + e->rdma_sge.vaddr = NULL; + e->rdma_sge.length = 0; + e->rdma_sge.sge_length = 0; + } + e->psn = psn; + if (old_req) + goto unlock_done; + qp->s_tail_ack_queue = prev; + break; + } + + case OP(COMPARE_SWAP): + case OP(FETCH_ADD): { + /* + * If we didn't find the atomic request in the ack queue + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. + */ + if (!e || e->opcode != (u8) opcode || old_req) + goto unlock_done; + qp->s_tail_ack_queue = prev; + break; + } + + default: + /* + * Ignore this operation if it doesn't request an ACK + * or an earlier RDMA read or atomic is going to be resent. + */ + if (!(psn & IB_BTH_REQ_ACK) || old_req) + goto unlock_done; + /* + * Resend the most recent ACK if this request is + * after all the previous RDMA reads and atomics. + */ + if (i == qp->r_head_ack_queue) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->r_psn - 1; + goto send_ack; + } + /* + * Try to send a simple ACK to work around a Mellanox bug + * which doesn't accept a RDMA read response or atomic + * response as an ACK for earlier SENDs or RDMA writes. + */ + if (!(qp->s_flags & QIB_S_RESP_PENDING)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; + goto send_ack; + } + /* + * Resend the RDMA read or atomic op which + * ACKs this duplicate request. + */ + qp->s_tail_ack_queue = i; + break; + } + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_flags |= QIB_S_RESP_PENDING; + qp->r_nak_state = 0; + qib_schedule_send(qp); + +unlock_done: + spin_unlock_irqrestore(&qp->s_lock, flags); +done: + return 1; + +send_ack: + return 0; +} + +void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err) +{ + unsigned long flags; + int lastwqe; + + spin_lock_irqsave(&qp->s_lock, flags); + lastwqe = qib_error_qp(qp, err); + spin_unlock_irqrestore(&qp->s_lock, flags); + + if (lastwqe) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} + +static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n) +{ + unsigned next; + + next = n + 1; + if (next > QIB_MAX_RDMA_ATOMIC) + next = 0; + qp->s_tail_ack_queue = next; + qp->s_ack_state = OP(ACKNOWLEDGE); +} + +/** + * qib_rc_rcv - process an incoming RC packet + * @rcd: the context pointer + * @hdr: the header of this packet + * @has_grh: true if the header has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP for this packet + * + * This is called from qib_qp_rcv() to process an incoming RC packet + * for the given QP. + * Called at interrupt level. + */ +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_ibport *ibp = &rcd->ppd->ibport_data; + struct qib_other_headers *ohdr; + u32 opcode; + u32 hdrsize; + u32 psn; + u32 pad; + struct ib_wc wc; + u32 pmtu = qp->pmtu; + int diff; + struct ib_reth *reth; + unsigned long flags; + int ret; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12; /* LRH + BTH */ + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ + } + + opcode = be32_to_cpu(ohdr->bth[0]); + if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) + return; + + psn = be32_to_cpu(ohdr->bth[2]); + opcode >>= 24; + + /* + * Process responses (ACKs) before anything else. Note that the + * packet sequence number will be for something in the send work + * queue rather than the expected receive packet sequence number. + * In other words, this QP is the requester. + */ + if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) { + qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn, + hdrsize, pmtu, rcd); + return; + } + + /* Compute 24 bits worth of difference. */ + diff = qib_cmp24(psn, qp->r_psn); + if (unlikely(diff)) { + if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd)) + return; + goto send_ack; + } + + /* Check for opcode sequence errors. */ + switch (qp->r_state) { + case OP(SEND_FIRST): + case OP(SEND_MIDDLE): + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + break; + goto nack_inv; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_MIDDLE): + if (opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + break; + goto nack_inv; + + default: + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + goto nack_inv; + /* + * Note that it is up to the requester to not send a new + * RDMA read or atomic operation before receiving an ACK + * for the previous operation. + */ + break; + } + + if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { + qp->r_flags |= QIB_R_COMM_EST; + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_COMM_EST; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + } + + /* OK, process the packet. */ + switch (opcode) { + case OP(SEND_FIRST): + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + qp->r_rcv_len = 0; + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + case OP(RDMA_WRITE_MIDDLE): +send_middle: + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto nack_inv; + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) + goto nack_inv; + qib_copy_sge(&qp->r_sge, data, pmtu, 1); + break; + + case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): + /* consume RWQE */ + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + goto send_last_imm; + + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + qp->r_rcv_len = 0; + if (opcode == OP(SEND_ONLY)) + goto no_immediate_data; + /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ + case OP(SEND_LAST_WITH_IMMEDIATE): +send_last_imm: + wc.ex.imm_data = ohdr->u.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; + case OP(SEND_LAST): + case OP(RDMA_WRITE_LAST): +no_immediate_data: + wc.wc_flags = 0; + wc.ex.imm_data = 0; +send_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto nack_inv; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + wc.byte_len = tlen + qp->r_rcv_len; + if (unlikely(wc.byte_len > qp->r_len)) + goto nack_inv; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + qp->r_msn++; + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + break; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); + break; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto nack_inv; + /* consume RWQE */ + reth = &ohdr->u.rc.reth; + hdrsize += sizeof(*reth); + qp->r_len = be32_to_cpu(reth->length); + qp->r_rcv_len = 0; + qp->r_sge.sg_list = NULL; + if (qp->r_len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey & NAK */ + ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, + rkey, IB_ACCESS_REMOTE_WRITE); + if (unlikely(!ok)) + goto nack_acc; + qp->r_sge.num_sge = 1; + } else { + qp->r_sge.num_sge = 0; + qp->r_sge.sge.mr = NULL; + qp->r_sge.sge.vaddr = NULL; + qp->r_sge.sge.length = 0; + qp->r_sge.sge.sge_length = 0; + } + if (opcode == OP(RDMA_WRITE_FIRST)) + goto send_middle; + else if (opcode == OP(RDMA_WRITE_ONLY)) + goto no_immediate_data; + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + wc.ex.imm_data = ohdr->u.rc.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; + + case OP(RDMA_READ_REQUEST): { + struct qib_ack_entry *e; + u32 len; + u8 next; + + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto nack_inv; + next = qp->r_head_ack_queue + 1; + /* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */ + if (next > QIB_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv_unlck; + qib_update_ack_queue(qp, next); + } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + reth = &ohdr->u.rc.reth; + len = be32_to_cpu(reth->length); + if (len) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey & NAK */ + ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, + rkey, IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) + goto nack_acc_unlck; + /* + * Update the next expected PSN. We add 1 later + * below, so only add the remainder here. + */ + if (len > pmtu) + qp->r_psn += (len - 1) / pmtu; + } else { + e->rdma_sge.mr = NULL; + e->rdma_sge.vaddr = NULL; + e->rdma_sge.length = 0; + e->rdma_sge.sge_length = 0; + } + e->opcode = opcode; + e->sent = 0; + e->psn = psn; + e->lpsn = qp->r_psn; + /* + * We need to increment the MSN here instead of when we + * finish sending the result since a duplicate request would + * increment it more than once. + */ + qp->r_msn++; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + qp->r_head_ack_queue = next; + + /* Schedule the send tasklet. */ + qp->s_flags |= QIB_S_RESP_PENDING; + qib_schedule_send(qp); + + goto sunlock; + } + + case OP(COMPARE_SWAP): + case OP(FETCH_ADD): { + struct ib_atomic_eth *ateth; + struct qib_ack_entry *e; + u64 vaddr; + atomic64_t *maddr; + u64 sdata; + u32 rkey; + u8 next; + + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto nack_inv; + next = qp->r_head_ack_queue + 1; + if (next > QIB_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv_unlck; + qib_update_ack_queue(qp, next); + } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + ateth = &ohdr->u.atomic_eth; + vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | + be32_to_cpu(ateth->vaddr[1]); + if (unlikely(vaddr & (sizeof(u64) - 1))) + goto nack_inv_unlck; + rkey = be32_to_cpu(ateth->rkey); + /* Check rkey & NAK */ + if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + vaddr, rkey, + IB_ACCESS_REMOTE_ATOMIC))) + goto nack_acc_unlck; + /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = be64_to_cpu(ateth->swap_data); + e->atomic_data = (opcode == OP(FETCH_ADD)) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + be64_to_cpu(ateth->compare_data), + sdata); + atomic_dec(&qp->r_sge.sge.mr->refcount); + qp->r_sge.num_sge = 0; + e->opcode = opcode; + e->sent = 0; + e->psn = psn; + e->lpsn = psn; + qp->r_msn++; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + qp->r_head_ack_queue = next; + + /* Schedule the send tasklet. */ + qp->s_flags |= QIB_S_RESP_PENDING; + qib_schedule_send(qp); + + goto sunlock; + } + + default: + /* NAK unknown opcodes. */ + goto nack_inv; + } + qp->r_psn++; + qp->r_state = opcode; + qp->r_ack_psn = psn; + qp->r_nak_state = 0; + /* Send an ACK if requested or required. */ + if (psn & (1 << 31)) + goto send_ack; + return; + +rnr_nak: + qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; + qp->r_ack_psn = qp->r_psn; + /* Queue RNR NAK for later */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + return; + +nack_op_err: + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR; + qp->r_ack_psn = qp->r_psn; + /* Queue NAK for later */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + return; + +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); +nack_inv: + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_INVALID_REQUEST; + qp->r_ack_psn = qp->r_psn; + /* Queue NAK for later */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + return; + +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); +nack_acc: + qib_rc_error(qp, IB_WC_LOC_PROT_ERR); + qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; + qp->r_ack_psn = qp->r_psn; +send_ack: + qib_send_rc_ack(qp); + return; + +sunlock: + spin_unlock_irqrestore(&qp->s_lock, flags); +} diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c new file mode 100644 index 00000000..b4b37e47 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -0,0 +1,822 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "qib.h" +#include "qib_mad.h" + +/* + * Convert the AETH RNR timeout code into the number of microseconds. + */ +const u32 ib_qib_rnr_table[32] = { + 655360, /* 00: 655.36 */ + 10, /* 01: .01 */ + 20, /* 02 .02 */ + 30, /* 03: .03 */ + 40, /* 04: .04 */ + 60, /* 05: .06 */ + 80, /* 06: .08 */ + 120, /* 07: .12 */ + 160, /* 08: .16 */ + 240, /* 09: .24 */ + 320, /* 0A: .32 */ + 480, /* 0B: .48 */ + 640, /* 0C: .64 */ + 960, /* 0D: .96 */ + 1280, /* 0E: 1.28 */ + 1920, /* 0F: 1.92 */ + 2560, /* 10: 2.56 */ + 3840, /* 11: 3.84 */ + 5120, /* 12: 5.12 */ + 7680, /* 13: 7.68 */ + 10240, /* 14: 10.24 */ + 15360, /* 15: 15.36 */ + 20480, /* 16: 20.48 */ + 30720, /* 17: 30.72 */ + 40960, /* 18: 40.96 */ + 61440, /* 19: 61.44 */ + 81920, /* 1A: 81.92 */ + 122880, /* 1B: 122.88 */ + 163840, /* 1C: 163.84 */ + 245760, /* 1D: 245.76 */ + 327680, /* 1E: 327.68 */ + 491520 /* 1F: 491.52 */ +}; + +/* + * Validate a RWQE and fill in the SGE state. + * Return 1 if OK. + */ +static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe) +{ + int i, j, ret; + struct ib_wc wc; + struct qib_lkey_table *rkt; + struct qib_pd *pd; + struct qib_sge_state *ss; + + rkt = &to_idev(qp->ibqp.device)->lk_table; + pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); + ss = &qp->r_sge; + ss->sg_list = qp->r_sg_list; + qp->r_len = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + qp->r_len += wqe->sg_list[i].length; + j++; + } + ss->num_sge = j; + ss->total_len = qp->r_len; + ret = 1; + goto bail; + +bad_lkey: + while (j) { + struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; + + atomic_dec(&sge->mr->refcount); + } + ss->num_sge = 0; + memset(&wc, 0, sizeof(wc)); + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + /* Signal solicited completion event. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} + +/** + * qib_get_rwqe - copy the next RWQE into the QP's RWQE + * @qp: the QP + * @wr_id_only: update qp->r_wr_id only, not qp->r_sge + * + * Return -1 if there is a local error, 0 if no RWQE is available, + * otherwise return 1. + * + * Can be called from interrupt level. + */ +int qib_get_rwqe(struct qib_qp *qp, int wr_id_only) +{ + unsigned long flags; + struct qib_rq *rq; + struct qib_rwq *wq; + struct qib_srq *srq; + struct qib_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + u32 tail; + int ret; + + if (qp->ibqp.srq) { + srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; + rq = &qp->r_rq; + } + + spin_lock_irqsave(&rq->lock, flags); + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + if (unlikely(tail == wq->head)) { + ret = 0; + goto unlock; + } + /* Make sure entry is read after head index is read. */ + smp_rmb(); + wqe = get_rwqe_ptr(rq, tail); + /* + * Even though we update the tail index in memory, the verbs + * consumer is not supposed to post more entries until a + * completion is generated. + */ + if (++tail >= rq->size) + tail = 0; + wq->tail = tail; + if (!wr_id_only && !qib_init_sge(qp, wqe)) { + ret = -1; + goto unlock; + } + qp->r_wr_id = wqe->wr_id; + + ret = 1; + set_bit(QIB_R_WRID_VALID, &qp->r_aflags); + if (handler) { + u32 n; + + /* + * Validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; + else + n -= tail; + if (n < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + goto bail; + } + } +unlock: + spin_unlock_irqrestore(&rq->lock, flags); +bail: + return ret; +} + +/* + * Switch to alternate path. + * The QP s_lock should be held and interrupts disabled. + */ +void qib_migrate_qp(struct qib_qp *qp) +{ + struct ib_event ev; + + qp->s_mig_state = IB_MIG_MIGRATED; + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); +} + +static __be64 get_sguid(struct qib_ibport *ibp, unsigned index) +{ + if (!index) { + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + return ppd->guid; + } else + return ibp->guids[index - 1]; +} + +static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) +{ + return (gid->global.interface_id == id && + (gid->global.subnet_prefix == gid_prefix || + gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX)); +} + +/* + * + * This should be called with the QP r_lock held. + * + * The s_lock will be acquired around the qib_migrate_qp() call. + */ +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, struct qib_qp *qp, u32 bth0) +{ + __be64 guid; + unsigned long flags; + + if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { + if (!has_grh) { + if (qp->alt_ah_attr.ah_flags & IB_AH_GRH) + goto err; + } else { + if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH)) + goto err; + guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index); + if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + goto err; + if (!gid_ok(&hdr->u.l.grh.sgid, + qp->alt_ah_attr.grh.dgid.global.subnet_prefix, + qp->alt_ah_attr.grh.dgid.global.interface_id)) + goto err; + } + if (!qib_pkey_ok((u16)bth0, + qib_get_pkey(ibp, qp->s_alt_pkey_index))) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + goto err; + } + /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ + if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || + ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) + goto err; + spin_lock_irqsave(&qp->s_lock, flags); + qib_migrate_qp(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + } else { + if (!has_grh) { + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + goto err; + } else { + if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) + goto err; + guid = get_sguid(ibp, + qp->remote_ah_attr.grh.sgid_index); + if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + goto err; + if (!gid_ok(&hdr->u.l.grh.sgid, + qp->remote_ah_attr.grh.dgid.global.subnet_prefix, + qp->remote_ah_attr.grh.dgid.global.interface_id)) + goto err; + } + if (!qib_pkey_ok((u16)bth0, + qib_get_pkey(ibp, qp->s_pkey_index))) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + goto err; + } + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid || + ppd_from_ibp(ibp)->port != qp->port_num) + goto err; + if (qp->s_mig_state == IB_MIG_REARM && + !(bth0 & IB_BTH_MIG_REQ)) + qp->s_mig_state = IB_MIG_ARMED; + } + + return 0; + +err: + return 1; +} + +/** + * qib_ruc_loopback - handle UC and RC lookback requests + * @sqp: the sending QP + * + * This is called from qib_do_send() to + * forward a WQE addressed to the same HCA. + * Note that although we are single threaded due to the tasklet, we still + * have to protect against post_send(). We don't have to worry about + * receive interrupts since this is a connected protocol and all packets + * will pass through here. + */ +static void qib_ruc_loopback(struct qib_qp *sqp) +{ + struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); + struct qib_qp *qp; + struct qib_swqe *wqe; + struct qib_sge *sge; + unsigned long flags; + struct ib_wc wc; + u64 sdata; + atomic64_t *maddr; + enum ib_wc_status send_status; + int release; + int ret; + + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ + qp = qib_lookup_qpn(ibp, sqp->remote_qpn); + + spin_lock_irqsave(&sqp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) || + !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + goto unlock; + + sqp->s_flags |= QIB_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) { + if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; + } + + /* + * We can rely on the entry not changing without the s_lock + * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. + */ + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } + spin_unlock_irqrestore(&sqp->s_lock, flags); + + if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) || + qp->ibqp.qp_type != sqp->ibqp.qp_type) { + ibp->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + + memset(&wc, 0, sizeof wc); + send_status = IB_WC_SUCCESS; + + release = 1; + sqp->s_sge.sge = wqe->sg_list[0]; + sqp->s_sge.sg_list = wqe->sg_list + 1; + sqp->s_sge.num_sge = wqe->wr.num_sge; + sqp->s_len = wqe->length; + switch (wqe->wr.opcode) { + case IB_WR_SEND_WITH_IMM: + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = wqe->wr.ex.imm_data; + /* FALLTHROUGH */ + case IB_WR_SEND: + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto op_err; + if (!ret) + goto rnr_nak; + break; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = wqe->wr.ex.imm_data; + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto op_err; + if (!ret) + goto rnr_nak; + /* FALLTHROUGH */ + case IB_WR_RDMA_WRITE: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; + if (wqe->length == 0) + break; + if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length, + wqe->wr.wr.rdma.remote_addr, + wqe->wr.wr.rdma.rkey, + IB_ACCESS_REMOTE_WRITE))) + goto acc_err; + qp->r_sge.sg_list = NULL; + qp->r_sge.num_sge = 1; + qp->r_sge.total_len = wqe->length; + break; + + case IB_WR_RDMA_READ: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto inv_err; + if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, + wqe->wr.wr.rdma.remote_addr, + wqe->wr.wr.rdma.rkey, + IB_ACCESS_REMOTE_READ))) + goto acc_err; + release = 0; + sqp->s_sge.sg_list = NULL; + sqp->s_sge.num_sge = 1; + qp->r_sge.sge = wqe->sg_list[0]; + qp->r_sge.sg_list = wqe->sg_list + 1; + qp->r_sge.num_sge = wqe->wr.num_sge; + qp->r_sge.total_len = wqe->length; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto inv_err; + if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + wqe->wr.wr.atomic.remote_addr, + wqe->wr.wr.atomic.rkey, + IB_ACCESS_REMOTE_ATOMIC))) + goto acc_err; + /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = wqe->wr.wr.atomic.compare_add; + *(u64 *) sqp->s_sge.sge.vaddr = + (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + sdata, wqe->wr.wr.atomic.swap); + atomic_dec(&qp->r_sge.sge.mr->refcount); + qp->r_sge.num_sge = 0; + goto send_comp; + + default: + send_status = IB_WC_LOC_QP_OP_ERR; + goto serr; + } + + sge = &sqp->s_sge.sge; + while (sqp->s_len) { + u32 len = sqp->s_len; + + if (len > sge->length) + len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + qib_copy_sge(&qp->r_sge, sge->vaddr, len, release); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (!release) + atomic_dec(&sge->mr->refcount); + if (--sqp->s_sge.num_sge) + *sge = *sqp->s_sge.sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + sqp->s_len -= len; + } + if (release) + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + goto send_comp; + + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + wc.port_num = 1; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + wqe->wr.send_flags & IB_SEND_SOLICITED); + +send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); + ibp->n_loop_pkts++; +flush_send: + sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + qib_send_complete(sqp, wqe, send_status); + goto again; + +rnr_nak: + /* Handle RNR NAK */ + if (qp->ibqp.qp_type == IB_QPT_UC) + goto send_comp; + ibp->n_rnr_naks++; + /* + * Note: we don't need the s_lock held since the BUSY flag + * makes this single threaded. + */ + if (sqp->s_rnr_retry == 0) { + send_status = IB_WC_RNR_RETRY_EXC_ERR; + goto serr; + } + if (sqp->s_rnr_retry_cnt < 7) + sqp->s_rnr_retry--; + spin_lock_irqsave(&sqp->s_lock, flags); + if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK)) + goto clr_busy; + sqp->s_flags |= QIB_S_WAIT_RNR; + sqp->s_timer.function = qib_rc_rnr_retry; + sqp->s_timer.expires = jiffies + + usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]); + add_timer(&sqp->s_timer); + goto clr_busy; + +op_err: + send_status = IB_WC_REM_OP_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +inv_err: + send_status = IB_WC_REM_INV_REQ_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +acc_err: + send_status = IB_WC_REM_ACCESS_ERR; + wc.status = IB_WC_LOC_PROT_ERR; +err: + /* responder goes to error state */ + qib_rc_error(qp, wc.status); + +serr: + spin_lock_irqsave(&sqp->s_lock, flags); + qib_send_complete(sqp, wqe, send_status); + if (sqp->ibqp.qp_type == IB_QPT_RC) { + int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + + sqp->s_flags &= ~QIB_S_BUSY; + spin_unlock_irqrestore(&sqp->s_lock, flags); + if (lastwqe) { + struct ib_event ev; + + ev.device = sqp->ibqp.device; + ev.element.qp = &sqp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); + } + goto done; + } +clr_busy: + sqp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&sqp->s_lock, flags); +done: + if (qp && atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +/** + * qib_make_grh - construct a GRH header + * @ibp: a pointer to the IB port + * @hdr: a pointer to the GRH header being constructed + * @grh: the global route address to send to + * @hwords: the number of 32 bit words of header being sent + * @nwords: the number of 32 bit words of data being sent + * + * Return the size of the header in 32 bit words. + */ +u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, + struct ib_global_route *grh, u32 hwords, u32 nwords) +{ + hdr->version_tclass_flow = + cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | + (grh->traffic_class << IB_GRH_TCLASS_SHIFT) | + (grh->flow_label << IB_GRH_FLOW_SHIFT)); + hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); + /* next_hdr is defined by C8-7 in ch. 8.4.1 */ + hdr->next_hdr = IB_GRH_NEXT_HDR; + hdr->hop_limit = grh->hop_limit; + /* The SGID is 32-bit aligned. */ + hdr->sgid.global.subnet_prefix = ibp->gid_prefix; + hdr->sgid.global.interface_id = grh->sgid_index ? + ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid; + hdr->dgid = grh->dgid; + + /* GRH header size in 32-bit words. */ + return sizeof(struct ib_grh) / sizeof(u32); +} + +void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, + u32 bth0, u32 bth2) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + u16 lrh0; + u32 nwords; + u32 extra_bytes; + + /* Construct the header. */ + extra_bytes = -qp->s_cur_size & 3; + nwords = (qp->s_cur_size + extra_bytes) >> 2; + lrh0 = QIB_LRH_BTH; + if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + &qp->remote_ah_attr.grh, + qp->s_hdrwords, nwords); + lrh0 = QIB_LRH_GRH; + } + lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | + qp->remote_ah_attr.sl << 4; + qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + qp->remote_ah_attr.src_path_bits); + bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); + bth0 |= extra_bytes << 20; + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth0 |= IB_BTH_MIG_REQ; + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(bth2); +} + +/** + * qib_do_send - perform a send on a QP + * @work: contains a pointer to the QP + * + * Process entries in the send work queue until credit or queue is + * exhausted. Only allow one CPU to send a packet per QP (tasklet). + * Otherwise, two threads could send packets out of order. + */ +void qib_do_send(struct work_struct *work) +{ + struct qib_qp *qp = container_of(work, struct qib_qp, s_work); + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int (*make_req)(struct qib_qp *qp); + unsigned long flags; + + if ((qp->ibqp.qp_type == IB_QPT_RC || + qp->ibqp.qp_type == IB_QPT_UC) && + (qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) { + qib_ruc_loopback(qp); + return; + } + + if (qp->ibqp.qp_type == IB_QPT_RC) + make_req = qib_make_rc_req; + else if (qp->ibqp.qp_type == IB_QPT_UC) + make_req = qib_make_uc_req; + else + make_req = qib_make_ud_req; + + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if (!qib_send_ok(qp)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + return; + } + + qp->s_flags |= QIB_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + + do { + /* Check for a constructed packet to be sent. */ + if (qp->s_hdrwords != 0) { + /* + * If the packet cannot be sent now, return and + * the send tasklet will be woken up later. + */ + if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, + qp->s_cur_sge, qp->s_cur_size)) + break; + /* Record that s_hdr is empty. */ + qp->s_hdrwords = 0; + } + } while (make_req(qp)); +} + +/* + * This should be called with s_lock held. + */ +void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, + enum ib_wc_status status) +{ + u32 old_last, last; + unsigned i; + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + return; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + + /* See ch. 11.2.4.1 and 10.7.3.1 */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + status != IB_WC_SUCCESS) { + struct ib_wc wc; + + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = status; + wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; + wc.qp = &qp->ibqp; + if (status == IB_WC_SUCCESS) + wc.byte_len = wqe->length; + qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); + } + + last = qp->s_last; + old_last = last; + if (++last >= qp->s_size) + last = 0; + qp->s_last = last; + if (qp->s_acked == old_last) + qp->s_acked = last; + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; +} diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c new file mode 100644 index 00000000..ac065dd6 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -0,0 +1,1448 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* + * This file contains all of the code that is specific to the SerDes + * on the QLogic_IB 7220 chip. + */ + +#include +#include +#include +#include + +#include "qib.h" +#include "qib_7220.h" + +#define SD7220_FW_NAME "qlogic/sd7220.fw" +MODULE_FIRMWARE(SD7220_FW_NAME); + +/* + * Same as in qib_iba7220.c, but just the registers needed here. + * Could move whole set to qib_7220.h, but decided better to keep + * local. + */ +#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64)) +#define kr_hwerrclear KREG_IDX(HwErrClear) +#define kr_hwerrmask KREG_IDX(HwErrMask) +#define kr_hwerrstatus KREG_IDX(HwErrStatus) +#define kr_ibcstatus KREG_IDX(IBCStatus) +#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl) +#define kr_scratch KREG_IDX(Scratch) +#define kr_xgxs_cfg KREG_IDX(XGXSCfg) +/* these are used only here, not in qib_iba7220.c */ +#define kr_ibsd_epb_access_ctrl KREG_IDX(ibsd_epb_access_ctrl) +#define kr_ibsd_epb_transaction_reg KREG_IDX(ibsd_epb_transaction_reg) +#define kr_pciesd_epb_transaction_reg KREG_IDX(pciesd_epb_transaction_reg) +#define kr_pciesd_epb_access_ctrl KREG_IDX(pciesd_epb_access_ctrl) +#define kr_serdes_ddsrxeq0 KREG_IDX(SerDes_DDSRXEQ0) + +/* + * The IBSerDesMappTable is a memory that holds values to be stored in + * various SerDes registers by IBC. + */ +#define kr_serdes_maptable KREG_IDX(IBSerDesMappTable) + +/* + * Below used for sdnum parameter, selecting one of the two sections + * used for PCIe, or the single SerDes used for IB. + */ +#define PCIE_SERDES0 0 +#define PCIE_SERDES1 1 + +/* + * The EPB requires addressing in a particular form. EPB_LOC() is intended + * to make #definitions a little more readable. + */ +#define EPB_ADDR_SHF 8 +#define EPB_LOC(chn, elt, reg) \ + (((elt & 0xf) | ((chn & 7) << 4) | ((reg & 0x3f) << 9)) << \ + EPB_ADDR_SHF) +#define EPB_IB_QUAD0_CS_SHF (25) +#define EPB_IB_QUAD0_CS (1U << EPB_IB_QUAD0_CS_SHF) +#define EPB_IB_UC_CS_SHF (26) +#define EPB_PCIE_UC_CS_SHF (27) +#define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8)) + +/* Forward declarations. */ +static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc, + u32 data, u32 mask); +static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, + int mask); +static int qib_sd_trimdone_poll(struct qib_devdata *dd); +static void qib_sd_trimdone_monitor(struct qib_devdata *dd, const char *where); +static int qib_sd_setvals(struct qib_devdata *dd); +static int qib_sd_early(struct qib_devdata *dd); +static int qib_sd_dactrim(struct qib_devdata *dd); +static int qib_internal_presets(struct qib_devdata *dd); +/* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */ +static int qib_sd_trimself(struct qib_devdata *dd, int val); +static int epb_access(struct qib_devdata *dd, int sdnum, int claim); +static int qib_sd7220_ib_load(struct qib_devdata *dd, + const struct firmware *fw); +static int qib_sd7220_ib_vfy(struct qib_devdata *dd, + const struct firmware *fw); + +/* + * Below keeps track of whether the "once per power-on" initialization has + * been done, because uC code Version 1.32.17 or higher allows the uC to + * be reset at will, and Automatic Equalization may require it. So the + * state of the reset "pin", is no longer valid. Instead, we check for the + * actual uC code having been loaded. + */ +static int qib_ibsd_ucode_loaded(struct qib_pportdata *ppd, + const struct firmware *fw) +{ + struct qib_devdata *dd = ppd->dd; + + if (!dd->cspec->serdes_first_init_done && + qib_sd7220_ib_vfy(dd, fw) > 0) + dd->cspec->serdes_first_init_done = 1; + return dd->cspec->serdes_first_init_done; +} + +/* repeat #define for local use. "Real" #define is in qib_iba7220.c */ +#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL +#define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF)) +#define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF)) +#define UC_PAR_CLR_D 8 +#define UC_PAR_CLR_M 0xC +#define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS) +#define START_EQ1(chan) EPB_LOC(chan, 7, 0x27) + +void qib_sd7220_clr_ibpar(struct qib_devdata *dd) +{ + int ret; + + /* clear, then re-enable parity errs */ + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, + UC_PAR_CLR_D, UC_PAR_CLR_M); + if (ret < 0) { + qib_dev_err(dd, "Failed clearing IBSerDes Parity err\n"); + goto bail; + } + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0, + UC_PAR_CLR_M); + + qib_read_kreg32(dd, kr_scratch); + udelay(4); + qib_write_kreg(dd, kr_hwerrclear, + QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR); + qib_read_kreg32(dd, kr_scratch); +bail: + return; +} + +/* + * After a reset or other unusual event, the epb interface may need + * to be re-synchronized, between the host and the uC. + * returns <0 for failure to resync within IBSD_RESYNC_TRIES (not expected) + */ +#define IBSD_RESYNC_TRIES 3 +#define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS) +#define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS) + +static int qib_resync_ibepb(struct qib_devdata *dd) +{ + int ret, pat, tries, chn; + u32 loc; + + ret = -1; + chn = 0; + for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) { + loc = IB_PGUDP(chn); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); + if (ret < 0) { + qib_dev_err(dd, "Failed read in resync\n"); + continue; + } + if (ret != 0xF0 && ret != 0x55 && tries == 0) + qib_dev_err(dd, "unexpected pattern in resync\n"); + pat = ret ^ 0xA5; /* alternate F0 and 55 */ + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF); + if (ret < 0) { + qib_dev_err(dd, "Failed write in resync\n"); + continue; + } + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); + if (ret < 0) { + qib_dev_err(dd, "Failed re-read in resync\n"); + continue; + } + if (ret != pat) { + qib_dev_err(dd, "Failed compare1 in resync\n"); + continue; + } + loc = IB_CMUDONE(chn); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); + if (ret < 0) { + qib_dev_err(dd, "Failed CMUDONE rd in resync\n"); + continue; + } + if ((ret & 0x70) != ((chn << 4) | 0x40)) { + qib_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n", + ret, chn); + continue; + } + if (++chn == 4) + break; /* Success */ + } + return (ret > 0) ? 0 : ret; +} + +/* + * Localize the stuff that should be done to change IB uC reset + * returns <0 for errors. + */ +static int qib_ibsd_reset(struct qib_devdata *dd, int assert_rst) +{ + u64 rst_val; + int ret = 0; + unsigned long flags; + + rst_val = qib_read_kreg64(dd, kr_ibserdesctrl); + if (assert_rst) { + /* + * Vendor recommends "interrupting" uC before reset, to + * minimize possible glitches. + */ + spin_lock_irqsave(&dd->cspec->sdepb_lock, flags); + epb_access(dd, IB_7220_SERDES, 1); + rst_val |= 1ULL; + /* Squelch possible parity error from _asserting_ reset */ + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask & + ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR); + qib_write_kreg(dd, kr_ibserdesctrl, rst_val); + /* flush write, delay to ensure it took effect */ + qib_read_kreg32(dd, kr_scratch); + udelay(2); + /* once it's reset, can remove interrupt */ + epb_access(dd, IB_7220_SERDES, -1); + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); + } else { + /* + * Before we de-assert reset, we need to deal with + * possible glitch on the Parity-error line. + * Suppress it around the reset, both in chip-level + * hwerrmask and in IB uC control reg. uC will allow + * it again during startup. + */ + u64 val; + rst_val &= ~(1ULL); + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask & + ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR); + + ret = qib_resync_ibepb(dd); + if (ret < 0) + qib_dev_err(dd, "unable to re-sync IB EPB\n"); + + /* set uC control regs to suppress parity errs */ + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1); + if (ret < 0) + goto bail; + /* IB uC code past Version 1.32.17 allow suppression of wdog */ + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, + 0x80); + if (ret < 0) { + qib_dev_err(dd, "Failed to set WDOG disable\n"); + goto bail; + } + qib_write_kreg(dd, kr_ibserdesctrl, rst_val); + /* flush write, delay for startup */ + qib_read_kreg32(dd, kr_scratch); + udelay(1); + /* clear, then re-enable parity errs */ + qib_sd7220_clr_ibpar(dd); + val = qib_read_kreg64(dd, kr_hwerrstatus); + if (val & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) { + qib_dev_err(dd, "IBUC Parity still set after RST\n"); + dd->cspec->hwerrmask &= + ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR; + } + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask); + } + +bail: + return ret; +} + +static void qib_sd_trimdone_monitor(struct qib_devdata *dd, + const char *where) +{ + int ret, chn, baduns; + u64 val; + + if (!where) + where = "?"; + + /* give time for reset to settle out in EPB */ + udelay(2); + + ret = qib_resync_ibepb(dd); + if (ret < 0) + qib_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where); + + /* Do "sacrificial read" to get EPB in sane state after reset */ + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0); + if (ret < 0) + qib_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where); + + /* Check/show "summary" Trim-done bit in IBCStatus */ + val = qib_read_kreg64(dd, kr_ibcstatus); + if (!(val & (1ULL << 11))) + qib_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where); + /* + * Do "dummy read/mod/wr" to get EPB in sane state after reset + * The default value for MPREG6 is 0. + */ + udelay(2); + + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80); + if (ret < 0) + qib_dev_err(dd, "Failed Dummy RMW, (%s)\n", where); + udelay(10); + + baduns = 0; + + for (chn = 3; chn >= 0; --chn) { + /* Read CTRL reg for each channel to check TRIMDONE */ + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, + IB_CTRL2(chn), 0, 0); + if (ret < 0) + qib_dev_err(dd, "Failed checking TRIMDONE, chn %d" + " (%s)\n", chn, where); + + if (!(ret & 0x10)) { + int probe; + + baduns |= (1 << chn); + qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)." + " (%s)\n", chn, ret, where); + probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, + IB_PGUDP(0), 0, 0); + qib_dev_err(dd, "probe is %d (%02X)\n", + probe, probe); + probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, + IB_CTRL2(chn), 0, 0); + qib_dev_err(dd, "re-read: %d (%02X)\n", + probe, probe); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, + IB_CTRL2(chn), 0x10, 0x10); + if (ret < 0) + qib_dev_err(dd, + "Err on TRIMDONE rewrite1\n"); + } + } + for (chn = 3; chn >= 0; --chn) { + /* Read CTRL reg for each channel to check TRIMDONE */ + if (baduns & (1 << chn)) { + qib_dev_err(dd, + "Reseting TRIMDONE on chn %d (%s)\n", + chn, where); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, + IB_CTRL2(chn), 0x10, 0x10); + if (ret < 0) + qib_dev_err(dd, "Failed re-setting " + "TRIMDONE, chn %d (%s)\n", + chn, where); + } + } +} + +/* + * Below is portion of IBA7220-specific bringup_serdes() that actually + * deals with registers and memory within the SerDes itself. + * Post IB uC code version 1.32.17, was_reset being 1 is not really + * informative, so we double-check. + */ +int qib_sd7220_init(struct qib_devdata *dd) +{ + const struct firmware *fw; + int ret = 1; /* default to failure */ + int first_reset, was_reset; + + /* SERDES MPU reset recorded in D0 */ + was_reset = (qib_read_kreg64(dd, kr_ibserdesctrl) & 1); + if (!was_reset) { + /* entered with reset not asserted, we need to do it */ + qib_ibsd_reset(dd, 1); + qib_sd_trimdone_monitor(dd, "Driver-reload"); + } + + ret = request_firmware(&fw, SD7220_FW_NAME, &dd->pcidev->dev); + if (ret) { + qib_dev_err(dd, "Failed to load IB SERDES image\n"); + goto done; + } + + /* Substitute our deduced value for was_reset */ + ret = qib_ibsd_ucode_loaded(dd->pport, fw); + if (ret < 0) + goto bail; + + first_reset = !ret; /* First reset if IBSD uCode not yet loaded */ + /* + * Alter some regs per vendor latest doc, reset-defaults + * are not right for IB. + */ + ret = qib_sd_early(dd); + if (ret < 0) { + qib_dev_err(dd, "Failed to set IB SERDES early defaults\n"); + goto bail; + } + /* + * Set DAC manual trim IB. + * We only do this once after chip has been reset (usually + * same as once per system boot). + */ + if (first_reset) { + ret = qib_sd_dactrim(dd); + if (ret < 0) { + qib_dev_err(dd, "Failed IB SERDES DAC trim\n"); + goto bail; + } + } + /* + * Set various registers (DDS and RXEQ) that will be + * controlled by IBC (in 1.2 mode) to reasonable preset values + * Calling the "internal" version avoids the "check for needed" + * and "trimdone monitor" that might be counter-productive. + */ + ret = qib_internal_presets(dd); + if (ret < 0) { + qib_dev_err(dd, "Failed to set IB SERDES presets\n"); + goto bail; + } + ret = qib_sd_trimself(dd, 0x80); + if (ret < 0) { + qib_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n"); + goto bail; + } + + /* Load image, then try to verify */ + ret = 0; /* Assume success */ + if (first_reset) { + int vfy; + int trim_done; + + ret = qib_sd7220_ib_load(dd, fw); + if (ret < 0) { + qib_dev_err(dd, "Failed to load IB SERDES image\n"); + goto bail; + } else { + /* Loaded image, try to verify */ + vfy = qib_sd7220_ib_vfy(dd, fw); + if (vfy != ret) { + qib_dev_err(dd, "SERDES PRAM VFY failed\n"); + goto bail; + } /* end if verified */ + } /* end if loaded */ + + /* + * Loaded and verified. Almost good... + * hold "success" in ret + */ + ret = 0; + /* + * Prev steps all worked, continue bringup + * De-assert RESET to uC, only in first reset, to allow + * trimming. + * + * Since our default setup sets START_EQ1 to + * PRESET, we need to clear that for this very first run. + */ + ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38); + if (ret < 0) { + qib_dev_err(dd, "Failed clearing START_EQ1\n"); + goto bail; + } + + qib_ibsd_reset(dd, 0); + /* + * If this is not the first reset, trimdone should be set + * already. We may need to check about this. + */ + trim_done = qib_sd_trimdone_poll(dd); + /* + * Whether or not trimdone succeeded, we need to put the + * uC back into reset to avoid a possible fight with the + * IBC state-machine. + */ + qib_ibsd_reset(dd, 1); + + if (!trim_done) { + qib_dev_err(dd, "No TRIMDONE seen\n"); + goto bail; + } + /* + * DEBUG: check each time we reset if trimdone bits have + * gotten cleared, and re-set them. + */ + qib_sd_trimdone_monitor(dd, "First-reset"); + /* Remember so we do not re-do the load, dactrim, etc. */ + dd->cspec->serdes_first_init_done = 1; + } + /* + * setup for channel training and load values for + * RxEq and DDS in tables used by IBC in IB1.2 mode + */ + ret = 0; + if (qib_sd_setvals(dd) >= 0) + goto done; +bail: + ret = 1; +done: + /* start relock timer regardless, but start at 1 second */ + set_7220_relock_poll(dd, -1); + + release_firmware(fw); + return ret; +} + +#define EPB_ACC_REQ 1 +#define EPB_ACC_GNT 0x100 +#define EPB_DATA_MASK 0xFF +#define EPB_RD (1ULL << 24) +#define EPB_TRANS_RDY (1ULL << 31) +#define EPB_TRANS_ERR (1ULL << 30) +#define EPB_TRANS_TRIES 5 + +/* + * query, claim, release ownership of the EPB (External Parallel Bus) + * for a specified SERDES. + * the "claim" parameter is >0 to claim, <0 to release, 0 to query. + * Returns <0 for errors, >0 if we had ownership, else 0. + */ +static int epb_access(struct qib_devdata *dd, int sdnum, int claim) +{ + u16 acc; + u64 accval; + int owned = 0; + u64 oct_sel = 0; + + switch (sdnum) { + case IB_7220_SERDES: + /* + * The IB SERDES "ownership" is fairly simple. A single each + * request/grant. + */ + acc = kr_ibsd_epb_access_ctrl; + break; + + case PCIE_SERDES0: + case PCIE_SERDES1: + /* PCIe SERDES has two "octants", need to select which */ + acc = kr_pciesd_epb_access_ctrl; + oct_sel = (2 << (sdnum - PCIE_SERDES0)); + break; + + default: + return 0; + } + + /* Make sure any outstanding transaction was seen */ + qib_read_kreg32(dd, kr_scratch); + udelay(15); + + accval = qib_read_kreg32(dd, acc); + + owned = !!(accval & EPB_ACC_GNT); + if (claim < 0) { + /* Need to release */ + u64 pollval; + /* + * The only writeable bits are the request and CS. + * Both should be clear + */ + u64 newval = 0; + qib_write_kreg(dd, acc, newval); + /* First read after write is not trustworthy */ + pollval = qib_read_kreg32(dd, acc); + udelay(5); + pollval = qib_read_kreg32(dd, acc); + if (pollval & EPB_ACC_GNT) + owned = -1; + } else if (claim > 0) { + /* Need to claim */ + u64 pollval; + u64 newval = EPB_ACC_REQ | oct_sel; + qib_write_kreg(dd, acc, newval); + /* First read after write is not trustworthy */ + pollval = qib_read_kreg32(dd, acc); + udelay(5); + pollval = qib_read_kreg32(dd, acc); + if (!(pollval & EPB_ACC_GNT)) + owned = -1; + } + return owned; +} + +/* + * Lemma to deal with race condition of write..read to epb regs + */ +static int epb_trans(struct qib_devdata *dd, u16 reg, u64 i_val, u64 *o_vp) +{ + int tries; + u64 transval; + + qib_write_kreg(dd, reg, i_val); + /* Throw away first read, as RDY bit may be stale */ + transval = qib_read_kreg64(dd, reg); + + for (tries = EPB_TRANS_TRIES; tries; --tries) { + transval = qib_read_kreg32(dd, reg); + if (transval & EPB_TRANS_RDY) + break; + udelay(5); + } + if (transval & EPB_TRANS_ERR) + return -1; + if (tries > 0 && o_vp) + *o_vp = transval; + return tries; +} + +/** + * qib_sd7220_reg_mod - modify SERDES register + * @dd: the qlogic_ib device + * @sdnum: which SERDES to access + * @loc: location - channel, element, register, as packed by EPB_LOC() macro. + * @wd: Write Data - value to set in register + * @mask: ones where data should be spliced into reg. + * + * Basic register read/modify/write, with un-needed acesses elided. That is, + * a mask of zero will prevent write, while a mask of 0xFF will prevent read. + * returns current (presumed, if a write was done) contents of selected + * register, or <0 if errors. + */ +static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc, + u32 wd, u32 mask) +{ + u16 trans; + u64 transval; + int owned; + int tries, ret; + unsigned long flags; + + switch (sdnum) { + case IB_7220_SERDES: + trans = kr_ibsd_epb_transaction_reg; + break; + + case PCIE_SERDES0: + case PCIE_SERDES1: + trans = kr_pciesd_epb_transaction_reg; + break; + + default: + return -1; + } + + /* + * All access is locked in software (vs other host threads) and + * hardware (vs uC access). + */ + spin_lock_irqsave(&dd->cspec->sdepb_lock, flags); + + owned = epb_access(dd, sdnum, 1); + if (owned < 0) { + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); + return -1; + } + ret = 0; + for (tries = EPB_TRANS_TRIES; tries; --tries) { + transval = qib_read_kreg32(dd, trans); + if (transval & EPB_TRANS_RDY) + break; + udelay(5); + } + + if (tries > 0) { + tries = 1; /* to make read-skip work */ + if (mask != 0xFF) { + /* + * Not a pure write, so need to read. + * loc encodes chip-select as well as address + */ + transval = loc | EPB_RD; + tries = epb_trans(dd, trans, transval, &transval); + } + if (tries > 0 && mask != 0) { + /* + * Not a pure read, so need to write. + */ + wd = (wd & mask) | (transval & ~mask); + transval = loc | (wd & EPB_DATA_MASK); + tries = epb_trans(dd, trans, transval, &transval); + } + } + /* else, failed to see ready, what error-handling? */ + + /* + * Release bus. Failure is an error. + */ + if (epb_access(dd, sdnum, -1) < 0) + ret = -1; + else + ret = transval & EPB_DATA_MASK; + + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); + if (tries <= 0) + ret = -1; + return ret; +} + +#define EPB_ROM_R (2) +#define EPB_ROM_W (1) +/* + * Below, all uC-related, use appropriate UC_CS, depending + * on which SerDes is used. + */ +#define EPB_UC_CTL EPB_LOC(6, 0, 0) +#define EPB_MADDRL EPB_LOC(6, 0, 2) +#define EPB_MADDRH EPB_LOC(6, 0, 3) +#define EPB_ROMDATA EPB_LOC(6, 0, 4) +#define EPB_RAMDATA EPB_LOC(6, 0, 5) + +/* Transfer date to/from uC Program RAM of IB or PCIe SerDes */ +static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc, + u8 *buf, int cnt, int rd_notwr) +{ + u16 trans; + u64 transval; + u64 csbit; + int owned; + int tries; + int sofar; + int addr; + int ret; + unsigned long flags; + const char *op; + + /* Pick appropriate transaction reg and "Chip select" for this serdes */ + switch (sdnum) { + case IB_7220_SERDES: + csbit = 1ULL << EPB_IB_UC_CS_SHF; + trans = kr_ibsd_epb_transaction_reg; + break; + + case PCIE_SERDES0: + case PCIE_SERDES1: + /* PCIe SERDES has uC "chip select" in different bit, too */ + csbit = 1ULL << EPB_PCIE_UC_CS_SHF; + trans = kr_pciesd_epb_transaction_reg; + break; + + default: + return -1; + } + + op = rd_notwr ? "Rd" : "Wr"; + spin_lock_irqsave(&dd->cspec->sdepb_lock, flags); + + owned = epb_access(dd, sdnum, 1); + if (owned < 0) { + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); + return -1; + } + + /* + * In future code, we may need to distinguish several address ranges, + * and select various memories based on this. For now, just trim + * "loc" (location including address and memory select) to + * "addr" (address within memory). we will only support PRAM + * The memory is 8KB. + */ + addr = loc & 0x1FFF; + for (tries = EPB_TRANS_TRIES; tries; --tries) { + transval = qib_read_kreg32(dd, trans); + if (transval & EPB_TRANS_RDY) + break; + udelay(5); + } + + sofar = 0; + if (tries > 0) { + /* + * Every "memory" access is doubly-indirect. + * We set two bytes of address, then read/write + * one or mores bytes of data. + */ + + /* First, we set control to "Read" or "Write" */ + transval = csbit | EPB_UC_CTL | + (rd_notwr ? EPB_ROM_R : EPB_ROM_W); + tries = epb_trans(dd, trans, transval, &transval); + while (tries > 0 && sofar < cnt) { + if (!sofar) { + /* Only set address at start of chunk */ + int addrbyte = (addr + sofar) >> 8; + transval = csbit | EPB_MADDRH | addrbyte; + tries = epb_trans(dd, trans, transval, + &transval); + if (tries <= 0) + break; + addrbyte = (addr + sofar) & 0xFF; + transval = csbit | EPB_MADDRL | addrbyte; + tries = epb_trans(dd, trans, transval, + &transval); + if (tries <= 0) + break; + } + + if (rd_notwr) + transval = csbit | EPB_ROMDATA | EPB_RD; + else + transval = csbit | EPB_ROMDATA | buf[sofar]; + tries = epb_trans(dd, trans, transval, &transval); + if (tries <= 0) + break; + if (rd_notwr) + buf[sofar] = transval & EPB_DATA_MASK; + ++sofar; + } + /* Finally, clear control-bit for Read or Write */ + transval = csbit | EPB_UC_CTL; + tries = epb_trans(dd, trans, transval, &transval); + } + + ret = sofar; + /* Release bus. Failure is an error */ + if (epb_access(dd, sdnum, -1) < 0) + ret = -1; + + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); + if (tries <= 0) + ret = -1; + return ret; +} + +#define PROG_CHUNK 64 + +static int qib_sd7220_prog_ld(struct qib_devdata *dd, int sdnum, + const u8 *img, int len, int offset) +{ + int cnt, sofar, req; + + sofar = 0; + while (sofar < len) { + req = len - sofar; + if (req > PROG_CHUNK) + req = PROG_CHUNK; + cnt = qib_sd7220_ram_xfer(dd, sdnum, offset + sofar, + (u8 *)img + sofar, req, 0); + if (cnt < req) { + sofar = -1; + break; + } + sofar += req; + } + return sofar; +} + +#define VFY_CHUNK 64 +#define SD_PRAM_ERROR_LIMIT 42 + +static int qib_sd7220_prog_vfy(struct qib_devdata *dd, int sdnum, + const u8 *img, int len, int offset) +{ + int cnt, sofar, req, idx, errors; + unsigned char readback[VFY_CHUNK]; + + errors = 0; + sofar = 0; + while (sofar < len) { + req = len - sofar; + if (req > VFY_CHUNK) + req = VFY_CHUNK; + cnt = qib_sd7220_ram_xfer(dd, sdnum, sofar + offset, + readback, req, 1); + if (cnt < req) { + /* failed in read itself */ + sofar = -1; + break; + } + for (idx = 0; idx < cnt; ++idx) { + if (readback[idx] != img[idx+sofar]) + ++errors; + } + sofar += cnt; + } + return errors ? -errors : sofar; +} + +static int +qib_sd7220_ib_load(struct qib_devdata *dd, const struct firmware *fw) +{ + return qib_sd7220_prog_ld(dd, IB_7220_SERDES, fw->data, fw->size, 0); +} + +static int +qib_sd7220_ib_vfy(struct qib_devdata *dd, const struct firmware *fw) +{ + return qib_sd7220_prog_vfy(dd, IB_7220_SERDES, fw->data, fw->size, 0); +} + +/* + * IRQ not set up at this point in init, so we poll. + */ +#define IB_SERDES_TRIM_DONE (1ULL << 11) +#define TRIM_TMO (30) + +static int qib_sd_trimdone_poll(struct qib_devdata *dd) +{ + int trim_tmo, ret; + uint64_t val; + + /* + * Default to failure, so IBC will not start + * without IB_SERDES_TRIM_DONE. + */ + ret = 0; + for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) { + val = qib_read_kreg64(dd, kr_ibcstatus); + if (val & IB_SERDES_TRIM_DONE) { + ret = 1; + break; + } + msleep(10); + } + if (trim_tmo >= TRIM_TMO) { + qib_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo); + ret = 0; + } + return ret; +} + +#define TX_FAST_ELT (9) + +/* + * Set the "negotiation" values for SERDES. These are used by the IB1.2 + * link negotiation. Macros below are attempt to keep the values a + * little more human-editable. + * First, values related to Drive De-emphasis Settings. + */ + +#define NUM_DDS_REGS 6 +#define DDS_REG_MAP 0x76A910 /* LSB-first list of regs (in elt 9) to mod */ + +#define DDS_VAL(amp_d, main_d, ipst_d, ipre_d, amp_s, main_s, ipst_s, ipre_s) \ + { { ((amp_d & 0x1F) << 1) | 1, ((amp_s & 0x1F) << 1) | 1, \ + (main_d << 3) | 4 | (ipre_d >> 2), \ + (main_s << 3) | 4 | (ipre_s >> 2), \ + ((ipst_d & 0xF) << 1) | ((ipre_d & 3) << 6) | 0x21, \ + ((ipst_s & 0xF) << 1) | ((ipre_s & 3) << 6) | 0x21 } } + +static struct dds_init { + uint8_t reg_vals[NUM_DDS_REGS]; +} dds_init_vals[] = { + /* DDR(FDR) SDR(HDR) */ + /* Vendor recommends below for 3m cable */ +#define DDS_3M 0 + DDS_VAL(31, 19, 12, 0, 29, 22, 9, 0), + DDS_VAL(31, 12, 15, 4, 31, 15, 15, 1), + DDS_VAL(31, 13, 15, 3, 31, 16, 15, 0), + DDS_VAL(31, 14, 15, 2, 31, 17, 14, 0), + DDS_VAL(31, 15, 15, 1, 31, 18, 13, 0), + DDS_VAL(31, 16, 15, 0, 31, 19, 12, 0), + DDS_VAL(31, 17, 14, 0, 31, 20, 11, 0), + DDS_VAL(31, 18, 13, 0, 30, 21, 10, 0), + DDS_VAL(31, 20, 11, 0, 28, 23, 8, 0), + DDS_VAL(31, 21, 10, 0, 27, 24, 7, 0), + DDS_VAL(31, 22, 9, 0, 26, 25, 6, 0), + DDS_VAL(30, 23, 8, 0, 25, 26, 5, 0), + DDS_VAL(29, 24, 7, 0, 23, 27, 4, 0), + /* Vendor recommends below for 1m cable */ +#define DDS_1M 13 + DDS_VAL(28, 25, 6, 0, 21, 28, 3, 0), + DDS_VAL(27, 26, 5, 0, 19, 29, 2, 0), + DDS_VAL(25, 27, 4, 0, 17, 30, 1, 0) +}; + +/* + * Now the RXEQ section of the table. + */ +/* Hardware packs an element number and register address thus: */ +#define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4)) +#define RXEQ_VAL(elt, adr, val0, val1, val2, val3) \ + {RXEQ_INIT_RDESC((elt), (adr)), {(val0), (val1), (val2), (val3)} } + +#define RXEQ_VAL_ALL(elt, adr, val) \ + {RXEQ_INIT_RDESC((elt), (adr)), {(val), (val), (val), (val)} } + +#define RXEQ_SDR_DFELTH 0 +#define RXEQ_SDR_TLTH 0 +#define RXEQ_SDR_G1CNT_Z1CNT 0x11 +#define RXEQ_SDR_ZCNT 23 + +static struct rxeq_init { + u16 rdesc; /* in form used in SerDesDDSRXEQ */ + u8 rdata[4]; +} rxeq_init_vals[] = { + /* Set Rcv Eq. to Preset node */ + RXEQ_VAL_ALL(7, 0x27, 0x10), + /* Set DFELTHFDR/HDR thresholds */ + RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR, was 0, 1, 2, 3 */ + RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */ + /* Set TLTHFDR/HDR theshold */ + RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR, was 0, 2, 4, 6 */ + RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR, was 0, 1, 2, 3 */ + /* Set Preamp setting 2 (ZFR/ZCNT) */ + RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR, was 12, 16, 20, 24 */ + RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR, was 12, 16, 20, 24 */ + /* Set Preamp DC gain and Setting 1 (GFR/GHR) */ + RXEQ_VAL(7, 0x1E, 16, 16, 16, 16), /* FDR, was 16, 17, 18, 20 */ + RXEQ_VAL(7, 0x1F, 16, 16, 16, 16), /* HDR, was 16, 17, 18, 20 */ + /* Toggle RELOCK (in VCDL_CTRL0) to lock to data */ + RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */ + RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */ +}; + +/* There are 17 values from vendor, but IBC only accesses the first 16 */ +#define DDS_ROWS (16) +#define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals) + +static int qib_sd_setvals(struct qib_devdata *dd) +{ + int idx, midx; + int min_idx; /* Minimum index for this portion of table */ + uint32_t dds_reg_map; + u64 __iomem *taddr, *iaddr; + uint64_t data; + uint64_t sdctl; + + taddr = dd->kregbase + kr_serdes_maptable; + iaddr = dd->kregbase + kr_serdes_ddsrxeq0; + + /* + * Init the DDS section of the table. + * Each "row" of the table provokes NUM_DDS_REG writes, to the + * registers indicated in DDS_REG_MAP. + */ + sdctl = qib_read_kreg64(dd, kr_ibserdesctrl); + sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8); + sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13); + qib_write_kreg(dd, kr_ibserdesctrl, sdctl); + + /* + * Iterate down table within loop for each register to store. + */ + dds_reg_map = DDS_REG_MAP; + for (idx = 0; idx < NUM_DDS_REGS; ++idx) { + data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT; + writeq(data, iaddr + idx); + mmiowb(); + qib_read_kreg32(dd, kr_scratch); + dds_reg_map >>= 4; + for (midx = 0; midx < DDS_ROWS; ++midx) { + u64 __iomem *daddr = taddr + ((midx << 4) + idx); + data = dds_init_vals[midx].reg_vals[idx]; + writeq(data, daddr); + mmiowb(); + qib_read_kreg32(dd, kr_scratch); + } /* End inner for (vals for this reg, each row) */ + } /* end outer for (regs to be stored) */ + + /* + * Init the RXEQ section of the table. + * This runs in a different order, as the pattern of + * register references is more complex, but there are only + * four "data" values per register. + */ + min_idx = idx; /* RXEQ indices pick up where DDS left off */ + taddr += 0x100; /* RXEQ data is in second half of table */ + /* Iterate through RXEQ register addresses */ + for (idx = 0; idx < RXEQ_ROWS; ++idx) { + int didx; /* "destination" */ + int vidx; + + /* didx is offset by min_idx to address RXEQ range of regs */ + didx = idx + min_idx; + /* Store the next RXEQ register address */ + writeq(rxeq_init_vals[idx].rdesc, iaddr + didx); + mmiowb(); + qib_read_kreg32(dd, kr_scratch); + /* Iterate through RXEQ values */ + for (vidx = 0; vidx < 4; vidx++) { + data = rxeq_init_vals[idx].rdata[vidx]; + writeq(data, taddr + (vidx << 6) + idx); + mmiowb(); + qib_read_kreg32(dd, kr_scratch); + } + } /* end outer for (Reg-writes for RXEQ) */ + return 0; +} + +#define CMUCTRL5 EPB_LOC(7, 0, 0x15) +#define RXHSCTRL0(chan) EPB_LOC(chan, 6, 0) +#define VCDL_DAC2(chan) EPB_LOC(chan, 6, 5) +#define VCDL_CTRL0(chan) EPB_LOC(chan, 6, 6) +#define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8) +#define START_EQ2(chan) EPB_LOC(chan, 7, 0x28) + +/* + * Repeat a "store" across all channels of the IB SerDes. + * Although nominally it inherits the "read value" of the last + * channel it modified, the only really useful return is <0 for + * failure, >= 0 for success. The parameter 'loc' is assumed to + * be the location in some channel of the register to be modified + * The caller can specify use of the "gang write" option of EPB, + * in which case we use the specified channel data for any fields + * not explicitely written. + */ +static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, + int mask) +{ + int ret = -1; + int chnl; + + if (loc & EPB_GLOBAL_WR) { + /* + * Our caller has assured us that we can set all four + * channels at once. Trust that. If mask is not 0xFF, + * we will read the _specified_ channel for our starting + * value. + */ + loc |= (1U << EPB_IB_QUAD0_CS_SHF); + chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7; + if (mask != 0xFF) { + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, + loc & ~EPB_GLOBAL_WR, 0, 0); + if (ret < 0) { + int sloc = loc >> EPB_ADDR_SHF; + + qib_dev_err(dd, "pre-read failed: elt %d," + " addr 0x%X, chnl %d\n", + (sloc & 0xF), + (sloc >> 9) & 0x3f, chnl); + return ret; + } + val = (ret & ~mask) | (val & mask); + } + loc &= ~(7 << (4+EPB_ADDR_SHF)); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF); + if (ret < 0) { + int sloc = loc >> EPB_ADDR_SHF; + + qib_dev_err(dd, "Global WR failed: elt %d," + " addr 0x%X, val %02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, val); + } + return ret; + } + /* Clear "channel" and set CS so we can simply iterate */ + loc &= ~(7 << (4+EPB_ADDR_SHF)); + loc |= (1U << EPB_IB_QUAD0_CS_SHF); + for (chnl = 0; chnl < 4; ++chnl) { + int cloc = loc | (chnl << (4+EPB_ADDR_SHF)); + + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask); + if (ret < 0) { + int sloc = loc >> EPB_ADDR_SHF; + + qib_dev_err(dd, "Write failed: elt %d," + " addr 0x%X, chnl %d, val 0x%02X," + " mask 0x%02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, + val & 0xFF, mask & 0xFF); + break; + } + } + return ret; +} + +/* + * Set the Tx values normally modified by IBC in IB1.2 mode to default + * values, as gotten from first row of init table. + */ +static int set_dds_vals(struct qib_devdata *dd, struct dds_init *ddi) +{ + int ret; + int idx, reg, data; + uint32_t regmap; + + regmap = DDS_REG_MAP; + for (idx = 0; idx < NUM_DDS_REGS; ++idx) { + reg = (regmap & 0xF); + regmap >>= 4; + data = ddi->reg_vals[idx]; + /* Vendor says RMW not needed for these regs, use 0xFF mask */ + ret = ibsd_mod_allchnls(dd, EPB_LOC(0, 9, reg), data, 0xFF); + if (ret < 0) + break; + } + return ret; +} + +/* + * Set the Rx values normally modified by IBC in IB1.2 mode to default + * values, as gotten from selected column of init table. + */ +static int set_rxeq_vals(struct qib_devdata *dd, int vsel) +{ + int ret; + int ridx; + int cnt = ARRAY_SIZE(rxeq_init_vals); + + for (ridx = 0; ridx < cnt; ++ridx) { + int elt, reg, val, loc; + + elt = rxeq_init_vals[ridx].rdesc & 0xF; + reg = rxeq_init_vals[ridx].rdesc >> 4; + loc = EPB_LOC(0, elt, reg); + val = rxeq_init_vals[ridx].rdata[vsel]; + /* mask of 0xFF, because hardware does full-byte store. */ + ret = ibsd_mod_allchnls(dd, loc, val, 0xFF); + if (ret < 0) + break; + } + return ret; +} + +/* + * Set the default values (row 0) for DDR Driver Demphasis. + * we do this initially and whenever we turn off IB-1.2 + * + * The "default" values for Rx equalization are also stored to + * SerDes registers. Formerly (and still default), we used set 2. + * For experimenting with cables and link-partners, we allow changing + * that via a module parameter. + */ +static unsigned qib_rxeq_set = 2; +module_param_named(rxeq_default_set, qib_rxeq_set, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(rxeq_default_set, + "Which set [0..3] of Rx Equalization values is default"); + +static int qib_internal_presets(struct qib_devdata *dd) +{ + int ret = 0; + + ret = set_dds_vals(dd, dds_init_vals + DDS_3M); + + if (ret < 0) + qib_dev_err(dd, "Failed to set default DDS values\n"); + ret = set_rxeq_vals(dd, qib_rxeq_set & 3); + if (ret < 0) + qib_dev_err(dd, "Failed to set default RXEQ values\n"); + return ret; +} + +int qib_sd7220_presets(struct qib_devdata *dd) +{ + int ret = 0; + + if (!dd->cspec->presets_needed) + return ret; + dd->cspec->presets_needed = 0; + /* Assert uC reset, so we don't clash with it. */ + qib_ibsd_reset(dd, 1); + udelay(2); + qib_sd_trimdone_monitor(dd, "link-down"); + + ret = qib_internal_presets(dd); + return ret; +} + +static int qib_sd_trimself(struct qib_devdata *dd, int val) +{ + int loc = CMUCTRL5 | (1U << EPB_IB_QUAD0_CS_SHF); + + return qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF); +} + +static int qib_sd_early(struct qib_devdata *dd) +{ + int ret; + + ret = ibsd_mod_allchnls(dd, RXHSCTRL0(0) | EPB_GLOBAL_WR, 0xD4, 0xFF); + if (ret < 0) + goto bail; + ret = ibsd_mod_allchnls(dd, START_EQ1(0) | EPB_GLOBAL_WR, 0x10, 0xFF); + if (ret < 0) + goto bail; + ret = ibsd_mod_allchnls(dd, START_EQ2(0) | EPB_GLOBAL_WR, 0x30, 0xFF); +bail: + return ret; +} + +#define BACTRL(chnl) EPB_LOC(chnl, 6, 0x0E) +#define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6) +#define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF) + +static int qib_sd_dactrim(struct qib_devdata *dd) +{ + int ret; + + ret = ibsd_mod_allchnls(dd, VCDL_DAC2(0) | EPB_GLOBAL_WR, 0x2D, 0xFF); + if (ret < 0) + goto bail; + + /* more fine-tuning of what will be default */ + ret = ibsd_mod_allchnls(dd, VCDL_CTRL2(0), 3, 0xF); + if (ret < 0) + goto bail; + + ret = ibsd_mod_allchnls(dd, BACTRL(0) | EPB_GLOBAL_WR, 0x40, 0xFF); + if (ret < 0) + goto bail; + + ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x04, 0xFF); + if (ret < 0) + goto bail; + + ret = ibsd_mod_allchnls(dd, RXHSSTATUS(0) | EPB_GLOBAL_WR, 0x04, 0xFF); + if (ret < 0) + goto bail; + + /* + * Delay for max possible number of steps, with slop. + * Each step is about 4usec. + */ + udelay(415); + + ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x00, 0xFF); + +bail: + return ret; +} + +#define RELOCK_FIRST_MS 3 +#define RXLSPPM(chan) EPB_LOC(chan, 0, 2) +void toggle_7220_rclkrls(struct qib_devdata *dd) +{ + int loc = RXLSPPM(0) | EPB_GLOBAL_WR; + int ret; + + ret = ibsd_mod_allchnls(dd, loc, 0, 0x80); + if (ret < 0) + qib_dev_err(dd, "RCLKRLS failed to clear D7\n"); + else { + udelay(1); + ibsd_mod_allchnls(dd, loc, 0x80, 0x80); + } + /* And again for good measure */ + udelay(1); + ret = ibsd_mod_allchnls(dd, loc, 0, 0x80); + if (ret < 0) + qib_dev_err(dd, "RCLKRLS failed to clear D7\n"); + else { + udelay(1); + ibsd_mod_allchnls(dd, loc, 0x80, 0x80); + } + /* Now reset xgxs and IBC to complete the recovery */ + dd->f_xgxs_reset(dd->pport); +} + +/* + * Shut down the timer that polls for relock occasions, if needed + * this is "hooked" from qib_7220_quiet_serdes(), which is called + * just before qib_shutdown_device() in qib_driver.c shuts down all + * the other timers + */ +void shutdown_7220_relock_poll(struct qib_devdata *dd) +{ + if (dd->cspec->relock_timer_active) + del_timer_sync(&dd->cspec->relock_timer); +} + +static unsigned qib_relock_by_timer = 1; +module_param_named(relock_by_timer, qib_relock_by_timer, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up"); + +static void qib_run_relock(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *)opaque; + struct qib_pportdata *ppd = dd->pport; + struct qib_chip_specific *cs = dd->cspec; + int timeoff; + + /* + * Check link-training state for "stuck" state, when down. + * if found, try relock and schedule another try at + * exponentially growing delay, maxed at one second. + * if not stuck, our work is done. + */ + if ((dd->flags & QIB_INITTED) && !(ppd->lflags & + (QIBL_IB_AUTONEG_INPROG | QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE))) { + if (qib_relock_by_timer) { + if (!(ppd->lflags & QIBL_IB_LINK_DISABLED)) + toggle_7220_rclkrls(dd); + } + /* re-set timer for next check */ + timeoff = cs->relock_interval << 1; + if (timeoff > HZ) + timeoff = HZ; + cs->relock_interval = timeoff; + } else + timeoff = HZ; + mod_timer(&cs->relock_timer, jiffies + timeoff); +} + +void set_7220_relock_poll(struct qib_devdata *dd, int ibup) +{ + struct qib_chip_specific *cs = dd->cspec; + + if (ibup) { + /* We are now up, relax timer to 1 second interval */ + if (cs->relock_timer_active) { + cs->relock_interval = HZ; + mod_timer(&cs->relock_timer, jiffies + HZ); + } + } else { + /* Transition to down, (re-)set timer to short interval. */ + unsigned int timeout; + + timeout = msecs_to_jiffies(RELOCK_FIRST_MS); + if (timeout == 0) + timeout = 1; + /* If timer has not yet been started, do so. */ + if (!cs->relock_timer_active) { + cs->relock_timer_active = 1; + init_timer(&cs->relock_timer); + cs->relock_timer.function = qib_run_relock; + cs->relock_timer.data = (unsigned long) dd; + cs->relock_interval = timeout; + cs->relock_timer.expires = jiffies + timeout; + add_timer(&cs->relock_timer); + } else { + cs->relock_interval = timeout; + mod_timer(&cs->relock_timer, jiffies + timeout); + } + } +} diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c new file mode 100644 index 00000000..12a96043 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -0,0 +1,976 @@ +/* + * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "qib.h" +#include "qib_common.h" + +/* default pio off, sdma on */ +static ushort sdma_descq_cnt = 256; +module_param_named(sdma_descq_cnt, sdma_descq_cnt, ushort, S_IRUGO); +MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries"); + +/* + * Bits defined in the send DMA descriptor. + */ +#define SDMA_DESC_LAST (1ULL << 11) +#define SDMA_DESC_FIRST (1ULL << 12) +#define SDMA_DESC_DMA_HEAD (1ULL << 13) +#define SDMA_DESC_USE_LARGE_BUF (1ULL << 14) +#define SDMA_DESC_INTR (1ULL << 15) +#define SDMA_DESC_COUNT_LSB 16 +#define SDMA_DESC_GEN_LSB 30 + +char *qib_sdma_state_names[] = { + [qib_sdma_state_s00_hw_down] = "s00_HwDown", + [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait", + [qib_sdma_state_s20_idle] = "s20_Idle", + [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait", + [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait", + [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait", + [qib_sdma_state_s99_running] = "s99_Running", +}; + +char *qib_sdma_event_names[] = { + [qib_sdma_event_e00_go_hw_down] = "e00_GoHwDown", + [qib_sdma_event_e10_go_hw_start] = "e10_GoHwStart", + [qib_sdma_event_e20_hw_started] = "e20_HwStarted", + [qib_sdma_event_e30_go_running] = "e30_GoRunning", + [qib_sdma_event_e40_sw_cleaned] = "e40_SwCleaned", + [qib_sdma_event_e50_hw_cleaned] = "e50_HwCleaned", + [qib_sdma_event_e60_hw_halted] = "e60_HwHalted", + [qib_sdma_event_e70_go_idle] = "e70_GoIdle", + [qib_sdma_event_e7220_err_halted] = "e7220_ErrHalted", + [qib_sdma_event_e7322_err_halted] = "e7322_ErrHalted", + [qib_sdma_event_e90_timer_tick] = "e90_TimerTick", +}; + +/* declare all statics here rather than keep sorting */ +static int alloc_sdma(struct qib_pportdata *); +static void sdma_complete(struct kref *); +static void sdma_finalput(struct qib_sdma_state *); +static void sdma_get(struct qib_sdma_state *); +static void sdma_put(struct qib_sdma_state *); +static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states); +static void sdma_start_sw_clean_up(struct qib_pportdata *); +static void sdma_sw_clean_up_task(unsigned long); +static void unmap_desc(struct qib_pportdata *, unsigned); + +static void sdma_get(struct qib_sdma_state *ss) +{ + kref_get(&ss->kref); +} + +static void sdma_complete(struct kref *kref) +{ + struct qib_sdma_state *ss = + container_of(kref, struct qib_sdma_state, kref); + + complete(&ss->comp); +} + +static void sdma_put(struct qib_sdma_state *ss) +{ + kref_put(&ss->kref, sdma_complete); +} + +static void sdma_finalput(struct qib_sdma_state *ss) +{ + sdma_put(ss); + wait_for_completion(&ss->comp); +} + +/* + * Complete all the sdma requests on the active list, in the correct + * order, and with appropriate processing. Called when cleaning up + * after sdma shutdown, and when new sdma requests are submitted for + * a link that is down. This matches what is done for requests + * that complete normally, it's just the full list. + * + * Must be called with sdma_lock held + */ +static void clear_sdma_activelist(struct qib_pportdata *ppd) +{ + struct qib_sdma_txreq *txp, *txp_next; + + list_for_each_entry_safe(txp, txp_next, &ppd->sdma_activelist, list) { + list_del_init(&txp->list); + if (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) { + unsigned idx; + + idx = txp->start_idx; + while (idx != txp->next_descq_idx) { + unmap_desc(ppd, idx); + if (++idx == ppd->sdma_descq_cnt) + idx = 0; + } + } + if (txp->callback) + (*txp->callback)(txp, QIB_SDMA_TXREQ_S_ABORTED); + } +} + +static void sdma_sw_clean_up_task(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *) opaque; + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + /* + * At this point, the following should always be true: + * - We are halted, so no more descriptors are getting retired. + * - We are not running, so no one is submitting new work. + * - Only we can send the e40_sw_cleaned, so we can't start + * running again until we say so. So, the active list and + * descq are ours to play with. + */ + + /* Process all retired requests. */ + qib_sdma_make_progress(ppd); + + clear_sdma_activelist(ppd); + + /* + * Resync count of added and removed. It is VERY important that + * sdma_descq_removed NEVER decrement - user_sdma depends on it. + */ + ppd->sdma_descq_removed = ppd->sdma_descq_added; + + /* + * Reset our notion of head and tail. + * Note that the HW registers will be reset when switching states + * due to calling __qib_sdma_process_event() below. + */ + ppd->sdma_descq_tail = 0; + ppd->sdma_descq_head = 0; + ppd->sdma_head_dma[0] = 0; + ppd->sdma_generation = 0; + + __qib_sdma_process_event(ppd, qib_sdma_event_e40_sw_cleaned); + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +/* + * This is called when changing to state qib_sdma_state_s10_hw_start_up_wait + * as a result of send buffer errors or send DMA descriptor errors. + * We want to disarm the buffers in these cases. + */ +static void sdma_hw_start_up(struct qib_pportdata *ppd) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + unsigned bufno; + + for (bufno = ss->first_sendbuf; bufno < ss->last_sendbuf; ++bufno) + ppd->dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_BUF(bufno)); + + ppd->dd->f_sdma_hw_start_up(ppd); +} + +static void sdma_sw_tear_down(struct qib_pportdata *ppd) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + + /* Releasing this reference means the state machine has stopped. */ + sdma_put(ss); +} + +static void sdma_start_sw_clean_up(struct qib_pportdata *ppd) +{ + tasklet_hi_schedule(&ppd->sdma_sw_clean_up_task); +} + +static void sdma_set_state(struct qib_pportdata *ppd, + enum qib_sdma_states next_state) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + struct sdma_set_state_action *action = ss->set_state_action; + unsigned op = 0; + + /* debugging bookkeeping */ + ss->previous_state = ss->current_state; + ss->previous_op = ss->current_op; + + ss->current_state = next_state; + + if (action[next_state].op_enable) + op |= QIB_SDMA_SENDCTRL_OP_ENABLE; + + if (action[next_state].op_intenable) + op |= QIB_SDMA_SENDCTRL_OP_INTENABLE; + + if (action[next_state].op_halt) + op |= QIB_SDMA_SENDCTRL_OP_HALT; + + if (action[next_state].op_drain) + op |= QIB_SDMA_SENDCTRL_OP_DRAIN; + + if (action[next_state].go_s99_running_tofalse) + ss->go_s99_running = 0; + + if (action[next_state].go_s99_running_totrue) + ss->go_s99_running = 1; + + ss->current_op = op; + + ppd->dd->f_sdma_sendctrl(ppd, ss->current_op); +} + +static void unmap_desc(struct qib_pportdata *ppd, unsigned head) +{ + __le64 *descqp = &ppd->sdma_descq[head].qw[0]; + u64 desc[2]; + dma_addr_t addr; + size_t len; + + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + + addr = (desc[1] << 32) | (desc[0] >> 32); + len = (desc[0] >> 14) & (0x7ffULL << 2); + dma_unmap_single(&ppd->dd->pcidev->dev, addr, len, DMA_TO_DEVICE); +} + +static int alloc_sdma(struct qib_pportdata *ppd) +{ + ppd->sdma_descq_cnt = sdma_descq_cnt; + if (!ppd->sdma_descq_cnt) + ppd->sdma_descq_cnt = 256; + + /* Allocate memory for SendDMA descriptor FIFO */ + ppd->sdma_descq = dma_alloc_coherent(&ppd->dd->pcidev->dev, + ppd->sdma_descq_cnt * sizeof(u64[2]), &ppd->sdma_descq_phys, + GFP_KERNEL); + + if (!ppd->sdma_descq) { + qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor " + "FIFO memory\n"); + goto bail; + } + + /* Allocate memory for DMA of head register to memory */ + ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev, + PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL); + if (!ppd->sdma_head_dma) { + qib_dev_err(ppd->dd, "failed to allocate SendDMA " + "head memory\n"); + goto cleanup_descq; + } + ppd->sdma_head_dma[0] = 0; + return 0; + +cleanup_descq: + dma_free_coherent(&ppd->dd->pcidev->dev, + ppd->sdma_descq_cnt * sizeof(u64[2]), (void *)ppd->sdma_descq, + ppd->sdma_descq_phys); + ppd->sdma_descq = NULL; + ppd->sdma_descq_phys = 0; +bail: + ppd->sdma_descq_cnt = 0; + return -ENOMEM; +} + +static void free_sdma(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + + if (ppd->sdma_head_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *)ppd->sdma_head_dma, + ppd->sdma_head_phys); + ppd->sdma_head_dma = NULL; + ppd->sdma_head_phys = 0; + } + + if (ppd->sdma_descq) { + dma_free_coherent(&dd->pcidev->dev, + ppd->sdma_descq_cnt * sizeof(u64[2]), + ppd->sdma_descq, ppd->sdma_descq_phys); + ppd->sdma_descq = NULL; + ppd->sdma_descq_phys = 0; + } +} + +static inline void make_sdma_desc(struct qib_pportdata *ppd, + u64 *sdmadesc, u64 addr, u64 dwlen, + u64 dwoffset) +{ + + WARN_ON(addr & 3); + /* SDmaPhyAddr[47:32] */ + sdmadesc[1] = addr >> 32; + /* SDmaPhyAddr[31:0] */ + sdmadesc[0] = (addr & 0xfffffffcULL) << 32; + /* SDmaGeneration[1:0] */ + sdmadesc[0] |= (ppd->sdma_generation & 3ULL) << + SDMA_DESC_GEN_LSB; + /* SDmaDwordCount[10:0] */ + sdmadesc[0] |= (dwlen & 0x7ffULL) << SDMA_DESC_COUNT_LSB; + /* SDmaBufOffset[12:2] */ + sdmadesc[0] |= dwoffset & 0x7ffULL; +} + +/* sdma_lock must be held */ +int qib_sdma_make_progress(struct qib_pportdata *ppd) +{ + struct list_head *lp = NULL; + struct qib_sdma_txreq *txp = NULL; + struct qib_devdata *dd = ppd->dd; + int progress = 0; + u16 hwhead; + u16 idx = 0; + + hwhead = dd->f_sdma_gethead(ppd); + + /* The reason for some of the complexity of this code is that + * not all descriptors have corresponding txps. So, we have to + * be able to skip over descs until we wander into the range of + * the next txp on the list. + */ + + if (!list_empty(&ppd->sdma_activelist)) { + lp = ppd->sdma_activelist.next; + txp = list_entry(lp, struct qib_sdma_txreq, list); + idx = txp->start_idx; + } + + while (ppd->sdma_descq_head != hwhead) { + /* if desc is part of this txp, unmap if needed */ + if (txp && (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) && + (idx == ppd->sdma_descq_head)) { + unmap_desc(ppd, ppd->sdma_descq_head); + if (++idx == ppd->sdma_descq_cnt) + idx = 0; + } + + /* increment dequed desc count */ + ppd->sdma_descq_removed++; + + /* advance head, wrap if needed */ + if (++ppd->sdma_descq_head == ppd->sdma_descq_cnt) + ppd->sdma_descq_head = 0; + + /* if now past this txp's descs, do the callback */ + if (txp && txp->next_descq_idx == ppd->sdma_descq_head) { + /* remove from active list */ + list_del_init(&txp->list); + if (txp->callback) + (*txp->callback)(txp, QIB_SDMA_TXREQ_S_OK); + /* see if there is another txp */ + if (list_empty(&ppd->sdma_activelist)) + txp = NULL; + else { + lp = ppd->sdma_activelist.next; + txp = list_entry(lp, struct qib_sdma_txreq, + list); + idx = txp->start_idx; + } + } + progress = 1; + } + if (progress) + qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd)); + return progress; +} + +/* + * This is called from interrupt context. + */ +void qib_sdma_intr(struct qib_pportdata *ppd) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + __qib_sdma_intr(ppd); + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +void __qib_sdma_intr(struct qib_pportdata *ppd) +{ + if (__qib_sdma_running(ppd)) + qib_sdma_make_progress(ppd); +} + +int qib_setup_sdma(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + int ret = 0; + + ret = alloc_sdma(ppd); + if (ret) + goto bail; + + /* set consistent sdma state */ + ppd->dd->f_sdma_init_early(ppd); + spin_lock_irqsave(&ppd->sdma_lock, flags); + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + /* set up reference counting */ + kref_init(&ppd->sdma_state.kref); + init_completion(&ppd->sdma_state.comp); + + ppd->sdma_generation = 0; + ppd->sdma_descq_head = 0; + ppd->sdma_descq_removed = 0; + ppd->sdma_descq_added = 0; + + INIT_LIST_HEAD(&ppd->sdma_activelist); + + tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, + (unsigned long)ppd); + + ret = dd->f_init_sdma_regs(ppd); + if (ret) + goto bail_alloc; + + qib_sdma_process_event(ppd, qib_sdma_event_e10_go_hw_start); + + return 0; + +bail_alloc: + qib_teardown_sdma(ppd); +bail: + return ret; +} + +void qib_teardown_sdma(struct qib_pportdata *ppd) +{ + qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down); + + /* + * This waits for the state machine to exit so it is not + * necessary to kill the sdma_sw_clean_up_task to make sure + * it is not running. + */ + sdma_finalput(&ppd->sdma_state); + + free_sdma(ppd); +} + +int qib_sdma_running(struct qib_pportdata *ppd) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + ret = __qib_sdma_running(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + return ret; +} + +/* + * Complete a request when sdma not running; likely only request + * but to simplify the code, always queue it, then process the full + * activelist. We process the entire list to ensure that this particular + * request does get it's callback, but in the correct order. + * Must be called with sdma_lock held + */ +static void complete_sdma_err_req(struct qib_pportdata *ppd, + struct qib_verbs_txreq *tx) +{ + atomic_inc(&tx->qp->s_dma_busy); + /* no sdma descriptors, so no unmap_desc */ + tx->txreq.start_idx = 0; + tx->txreq.next_descq_idx = 0; + list_add_tail(&tx->txreq.list, &ppd->sdma_activelist); + clear_sdma_activelist(ppd); +} + +/* + * This function queues one IB packet onto the send DMA queue per call. + * The caller is responsible for checking: + * 1) The number of send DMA descriptor entries is less than the size of + * the descriptor queue. + * 2) The IB SGE addresses and lengths are 32-bit aligned + * (except possibly the last SGE's length) + * 3) The SGE addresses are suitable for passing to dma_map_single(). + */ +int qib_sdma_verbs_send(struct qib_pportdata *ppd, + struct qib_sge_state *ss, u32 dwords, + struct qib_verbs_txreq *tx) +{ + unsigned long flags; + struct qib_sge *sge; + struct qib_qp *qp; + int ret = 0; + u16 tail; + __le64 *descqp; + u64 sdmadesc[2]; + u32 dwoffset; + dma_addr_t addr; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + +retry: + if (unlikely(!__qib_sdma_running(ppd))) { + complete_sdma_err_req(ppd, tx); + goto unlock; + } + + if (tx->txreq.sg_count > qib_sdma_descq_freecnt(ppd)) { + if (qib_sdma_make_progress(ppd)) + goto retry; + if (ppd->dd->flags & QIB_HAS_SDMA_TIMEOUT) + ppd->dd->f_sdma_set_desc_cnt(ppd, + ppd->sdma_descq_cnt / 2); + goto busy; + } + + dwoffset = tx->hdr_dwords; + make_sdma_desc(ppd, sdmadesc, (u64) tx->txreq.addr, dwoffset, 0); + + sdmadesc[0] |= SDMA_DESC_FIRST; + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF) + sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF; + + /* write to the descq */ + tail = ppd->sdma_descq_tail; + descqp = &ppd->sdma_descq[tail].qw[0]; + *descqp++ = cpu_to_le64(sdmadesc[0]); + *descqp++ = cpu_to_le64(sdmadesc[1]); + + /* increment the tail */ + if (++tail == ppd->sdma_descq_cnt) { + tail = 0; + descqp = &ppd->sdma_descq[0].qw[0]; + ++ppd->sdma_generation; + } + + tx->txreq.start_idx = tail; + + sge = &ss->sge; + while (dwords) { + u32 dw; + u32 len; + + len = dwords << 2; + if (len > sge->length) + len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + dw = (len + 3) >> 2; + addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, + dw << 2, DMA_TO_DEVICE); + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + goto unmap; + sdmadesc[0] = 0; + make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); + /* SDmaUseLargeBuf has to be set in every descriptor */ + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF) + sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF; + /* write to the descq */ + *descqp++ = cpu_to_le64(sdmadesc[0]); + *descqp++ = cpu_to_le64(sdmadesc[1]); + + /* increment the tail */ + if (++tail == ppd->sdma_descq_cnt) { + tail = 0; + descqp = &ppd->sdma_descq[0].qw[0]; + ++ppd->sdma_generation; + } + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + + dwoffset += dw; + dwords -= dw; + } + + if (!tail) + descqp = &ppd->sdma_descq[ppd->sdma_descq_cnt].qw[0]; + descqp -= 2; + descqp[0] |= cpu_to_le64(SDMA_DESC_LAST); + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_HEADTOHOST) + descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD); + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ) + descqp[0] |= cpu_to_le64(SDMA_DESC_INTR); + + atomic_inc(&tx->qp->s_dma_busy); + tx->txreq.next_descq_idx = tail; + ppd->dd->f_sdma_update_tail(ppd, tail); + ppd->sdma_descq_added += tx->txreq.sg_count; + list_add_tail(&tx->txreq.list, &ppd->sdma_activelist); + goto unlock; + +unmap: + for (;;) { + if (!tail) + tail = ppd->sdma_descq_cnt - 1; + else + tail--; + if (tail == ppd->sdma_descq_tail) + break; + unmap_desc(ppd, tail); + } + qp = tx->qp; + qib_put_txreq(tx); + spin_lock(&qp->r_lock); + spin_lock(&qp->s_lock); + if (qp->ibqp.qp_type == IB_QPT_RC) { + /* XXX what about error sending RDMA read responses? */ + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) + qib_error_qp(qp, IB_WC_GENERAL_ERR); + } else if (qp->s_wqe) + qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->r_lock); + /* return zero to process the next send work request */ + goto unlock; + +busy: + qp = tx->qp; + spin_lock(&qp->s_lock); + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + struct qib_ibdev *dev; + + /* + * If we couldn't queue the DMA request, save the info + * and try again later rather than destroying the + * buffer and undoing the side effects of the copy. + */ + tx->ss = ss; + tx->dwords = dwords; + qp->s_tx = tx; + dev = &ppd->dd->verbs_dev; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->iowait)) { + struct qib_ibport *ibp; + + ibp = &ppd->ibport_data; + ibp->n_dmawait++; + qp->s_flags |= QIB_S_WAIT_DMA_DESC; + list_add_tail(&qp->iowait, &dev->dmawait); + } + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~QIB_S_BUSY; + spin_unlock(&qp->s_lock); + ret = -EBUSY; + } else { + spin_unlock(&qp->s_lock); + qib_put_txreq(tx); + } +unlock: + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return ret; +} + +void qib_sdma_process_event(struct qib_pportdata *ppd, + enum qib_sdma_events event) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + __qib_sdma_process_event(ppd, event); + + if (ppd->sdma_state.current_state == qib_sdma_state_s99_running) + qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd)); + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +void __qib_sdma_process_event(struct qib_pportdata *ppd, + enum qib_sdma_events event) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + + switch (ss->current_state) { + case qib_sdma_state_s00_hw_down: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + break; + case qib_sdma_event_e30_go_running: + /* + * If down, but running requested (usually result + * of link up, then we need to start up. + * This can happen when hw down is requested while + * bringing the link up with traffic active on + * 7220, e.g. */ + ss->go_s99_running = 1; + /* fall through and start dma engine */ + case qib_sdma_event_e10_go_hw_start: + /* This reference means the state machine is started */ + sdma_get(&ppd->sdma_state); + sdma_set_state(ppd, + qib_sdma_state_s10_hw_start_up_wait); + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e40_sw_cleaned: + sdma_sw_tear_down(ppd); + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s10_hw_start_up_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_sw_tear_down(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + sdma_set_state(ppd, ss->go_s99_running ? + qib_sdma_state_s99_running : + qib_sdma_state_s20_idle); + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s20_idle: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_sw_tear_down(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + sdma_set_state(ppd, qib_sdma_state_s99_running); + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + sdma_set_state(ppd, + qib_sdma_state_s10_hw_start_up_wait); + sdma_hw_start_up(ppd); + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + sdma_set_state(ppd, + qib_sdma_state_s30_sw_clean_up_wait); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s50_hw_halt_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + sdma_set_state(ppd, + qib_sdma_state_s40_hw_clean_up_wait); + ppd->dd->f_sdma_hw_clean_up(ppd); + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s99_running: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + sdma_set_state(ppd, + qib_sdma_state_s30_sw_clean_up_wait); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e70_go_idle: + sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait); + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + sdma_set_state(ppd, + qib_sdma_state_s30_sw_clean_up_wait); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e7322_err_halted: + sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait); + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + } + + ss->last_event = event; +} diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c new file mode 100644 index 00000000..d6235931 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "qib_verbs.h" + +/** + * qib_post_srq_receive - post a receive on a shared receive queue + * @ibsrq: the SRQ to post the receive on + * @wr: the list of work requests to post + * @bad_wr: A pointer to the first WR to cause a problem is put here + * + * This may be called from interrupt context. + */ +int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qib_srq *srq = to_isrq(ibsrq); + struct qib_rwq *wq; + unsigned long flags; + int ret; + + for (; wr; wr = wr->next) { + struct qib_rwqe *wqe; + u32 next; + int i; + + if ((unsigned) wr->num_sge > srq->rq.max_sge) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&srq->rq.lock, flags); + wq = srq->rq.wq; + next = wq->head + 1; + if (next >= srq->rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&srq->rq.lock, flags); + *bad_wr = wr; + ret = -ENOMEM; + goto bail; + } + + wqe = get_rwqe_ptr(&srq->rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&srq->rq.lock, flags); + } + ret = 0; + +bail: + return ret; +} + +/** + * qib_create_srq - create a shared receive queue + * @ibpd: the protection domain of the SRQ to create + * @srq_init_attr: the attributes of the SRQ + * @udata: data from libibverbs when creating a user SRQ + */ +struct ib_srq *qib_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibpd->device); + struct qib_srq *srq; + u32 sz; + struct ib_srq *ret; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + + if (srq_init_attr->attr.max_sge == 0 || + srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || + srq_init_attr->attr.max_wr == 0 || + srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) { + ret = ERR_PTR(-EINVAL); + goto done; + } + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + ret = ERR_PTR(-ENOMEM); + goto done; + } + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + srq->rq.size = srq_init_attr->attr.max_wr + 1; + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + + sizeof(struct qib_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz); + if (!srq->rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_srq; + } + + /* + * Return the address of the RWQ as the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz; + + srq->ip = + qib_create_mmap_info(dev, s, ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else + srq->ip = NULL; + + /* + * ib_create_srq() will initialize srq->ibsrq. + */ + spin_lock_init(&srq->rq.lock); + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; + srq->limit = srq_init_attr->attr.srq_limit; + + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == ib_qib_max_srqs) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); + + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = &srq->ibsrq; + goto done; + +bail_ip: + kfree(srq->ip); +bail_wq: + vfree(srq->rq.wq); +bail_srq: + kfree(srq); +done: + return ret; +} + +/** + * qib_modify_srq - modify a shared receive queue + * @ibsrq: the SRQ to modify + * @attr: the new attributes of the SRQ + * @attr_mask: indicates which attributes to modify + * @udata: user data for libibverbs.so + */ +int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) +{ + struct qib_srq *srq = to_isrq(ibsrq); + struct qib_rwq *wq; + int ret = 0; + + if (attr_mask & IB_SRQ_MAX_WR) { + struct qib_rwq *owq; + struct qib_rwqe *p; + u32 sz, size, n, head, tail; + + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > ib_qib_max_srq_wrs) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) { + ret = -EINVAL; + goto bail; + } + + sz = sizeof(struct qib_rwqe) + + srq->rq.max_sge * sizeof(struct ib_sge); + size = attr->max_wr + 1; + wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz); + if (!wq) { + ret = -ENOMEM; + goto bail; + } + + /* Check that we can write the offset to mmap. */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = 0; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) + goto bail_free; + udata->outbuf = + (void __user *) (unsigned long) offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head and tail pointer values and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + tail = owq->tail; + if (head >= srq->rq.size || tail >= srq->rq.size) { + ret = -EINVAL; + goto bail_unlock; + } + n = head; + if (n < tail) + n += srq->rq.size - tail; + else + n -= tail; + if (size <= n) { + ret = -EINVAL; + goto bail_unlock; + } + n = 0; + p = wq->wq; + while (tail != head) { + struct qib_rwqe *wqe; + int i; + + wqe = get_rwqe_ptr(&srq->rq, tail); + p->wr_id = wqe->wr_id; + p->num_sge = wqe->num_sge; + for (i = 0; i < wqe->num_sge; i++) + p->sg_list[i] = wqe->sg_list[i]; + n++; + p = (struct qib_rwqe *)((char *) p + sz); + if (++tail >= srq->rq.size) + tail = 0; + } + srq->rq.wq = wq; + srq->rq.size = size; + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct qib_mmap_info *ip = srq->ip; + struct qib_ibdev *dev = to_idev(srq->ibsrq.device); + u32 s = sizeof(struct qib_rwq) + size * sz; + + qib_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + /* + * Put user mapping info onto the pending list + * unless it already is on the list. + */ + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + } + goto bail; + +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); +bail: + return ret; +} + +int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct qib_srq *srq = to_isrq(ibsrq); + + attr->max_wr = srq->rq.size - 1; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; +} + +/** + * qib_destroy_srq - destroy a shared receive queue + * @ibsrq: the SRQ to destroy + */ +int qib_destroy_srq(struct ib_srq *ibsrq) +{ + struct qib_srq *srq = to_isrq(ibsrq); + struct qib_ibdev *dev = to_idev(ibsrq->device); + + spin_lock(&dev->n_srqs_lock); + dev->n_srqs_allocated--; + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, qib_release_mmap_info); + else + vfree(srq->rq.wq); + kfree(srq); + + return 0; +} diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c new file mode 100644 index 00000000..dae51604 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -0,0 +1,723 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include + +#include "qib.h" + +/** + * qib_parse_ushort - parse an unsigned short value in an arbitrary base + * @str: the string containing the number + * @valp: where to put the result + * + * Returns the number of bytes consumed, or negative value on error. + */ +static int qib_parse_ushort(const char *str, unsigned short *valp) +{ + unsigned long val; + char *end; + int ret; + + if (!isdigit(str[0])) { + ret = -EINVAL; + goto bail; + } + + val = simple_strtoul(str, &end, 0); + + if (val > 0xffff) { + ret = -EINVAL; + goto bail; + } + + *valp = val; + + ret = end + 1 - str; + if (ret == 0) + ret = -EINVAL; + +bail: + return ret; +} + +/* start of per-port functions */ +/* + * Get/Set heartbeat enable. OR of 1=enabled, 2=auto + */ +static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) +{ + struct qib_devdata *dd = ppd->dd; + int ret; + + ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, + size_t count) +{ + struct qib_devdata *dd = ppd->dd; + int ret; + u16 val; + + ret = qib_parse_ushort(buf, &val); + + /* + * Set the "intentional" heartbeat enable per either of + * "Enable" and "Auto", as these are normally set together. + * This bit is consulted when leaving loopback mode, + * because entering loopback mode overrides it and automatically + * disables heartbeat. + */ + if (ret >= 0) + ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); + if (ret < 0) + qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + return ret < 0 ? ret : count; +} + +static ssize_t store_loopback(struct qib_pportdata *ppd, const char *buf, + size_t count) +{ + struct qib_devdata *dd = ppd->dd; + int ret = count, r; + + r = dd->f_set_ib_loopback(ppd, buf); + if (r < 0) + ret = r; + + return ret; +} + +static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, + size_t count) +{ + struct qib_devdata *dd = ppd->dd; + int ret; + u16 val; + + ret = qib_parse_ushort(buf, &val); + if (ret > 0) + qib_set_led_override(ppd, val); + else + qib_dev_err(dd, "attempt to set invalid LED override\n"); + return ret < 0 ? ret : count; +} + +static ssize_t show_status(struct qib_pportdata *ppd, char *buf) +{ + ssize_t ret; + + if (!ppd->statusp) + ret = -EINVAL; + else + ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", + (unsigned long long) *(ppd->statusp)); + return ret; +} + +/* + * For userland compatibility, these offsets must remain fixed. + * They are strings for QIB_STATUS_* + */ +static const char * const qib_status_str[] = { + "Initted", + "", + "", + "", + "", + "Present", + "IB_link_up", + "IB_configured", + "", + "Fatal_Hardware_Error", + NULL, +}; + +static ssize_t show_status_str(struct qib_pportdata *ppd, char *buf) +{ + int i, any; + u64 s; + ssize_t ret; + + if (!ppd->statusp) { + ret = -EINVAL; + goto bail; + } + + s = *(ppd->statusp); + *buf = '\0'; + for (any = i = 0; s && qib_status_str[i]; i++) { + if (s & 1) { + /* if overflow */ + if (any && strlcat(buf, " ", PAGE_SIZE) >= PAGE_SIZE) + break; + if (strlcat(buf, qib_status_str[i], PAGE_SIZE) >= + PAGE_SIZE) + break; + any = 1; + } + s >>= 1; + } + if (any) + strlcat(buf, "\n", PAGE_SIZE); + + ret = strlen(buf); + +bail: + return ret; +} + +/* end of per-port functions */ + +/* + * Start of per-port file structures and support code + * Because we are fitting into other infrastructure, we have to supply the + * full set of kobject/sysfs_ops structures and routines. + */ +#define QIB_PORT_ATTR(name, mode, show, store) \ + static struct qib_port_attr qib_port_attr_##name = \ + __ATTR(name, mode, show, store) + +struct qib_port_attr { + struct attribute attr; + ssize_t (*show)(struct qib_pportdata *, char *); + ssize_t (*store)(struct qib_pportdata *, const char *, size_t); +}; + +QIB_PORT_ATTR(loopback, S_IWUSR, NULL, store_loopback); +QIB_PORT_ATTR(led_override, S_IWUSR, NULL, store_led_override); +QIB_PORT_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb, + store_hrtbt_enb); +QIB_PORT_ATTR(status, S_IRUGO, show_status, NULL); +QIB_PORT_ATTR(status_str, S_IRUGO, show_status_str, NULL); + +static struct attribute *port_default_attributes[] = { + &qib_port_attr_loopback.attr, + &qib_port_attr_led_override.attr, + &qib_port_attr_hrtbt_enable.attr, + &qib_port_attr_status.attr, + &qib_port_attr_status_str.attr, + NULL +}; + +static ssize_t qib_portattr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct qib_port_attr *pattr = + container_of(attr, struct qib_port_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_kobj); + + return pattr->show(ppd, buf); +} + +static ssize_t qib_portattr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t len) +{ + struct qib_port_attr *pattr = + container_of(attr, struct qib_port_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_kobj); + + return pattr->store(ppd, buf, len); +} + +static void qib_port_release(struct kobject *kobj) +{ + /* nothing to do since memory is freed by qib_free_devdata() */ +} + +static const struct sysfs_ops qib_port_ops = { + .show = qib_portattr_show, + .store = qib_portattr_store, +}; + +static struct kobj_type qib_port_ktype = { + .release = qib_port_release, + .sysfs_ops = &qib_port_ops, + .default_attrs = port_default_attributes +}; + +/* Start sl2vl */ + +#define QIB_SL2VL_ATTR(N) \ + static struct qib_sl2vl_attr qib_sl2vl_attr_##N = { \ + .attr = { .name = __stringify(N), .mode = 0444 }, \ + .sl = N \ + } + +struct qib_sl2vl_attr { + struct attribute attr; + int sl; +}; + +QIB_SL2VL_ATTR(0); +QIB_SL2VL_ATTR(1); +QIB_SL2VL_ATTR(2); +QIB_SL2VL_ATTR(3); +QIB_SL2VL_ATTR(4); +QIB_SL2VL_ATTR(5); +QIB_SL2VL_ATTR(6); +QIB_SL2VL_ATTR(7); +QIB_SL2VL_ATTR(8); +QIB_SL2VL_ATTR(9); +QIB_SL2VL_ATTR(10); +QIB_SL2VL_ATTR(11); +QIB_SL2VL_ATTR(12); +QIB_SL2VL_ATTR(13); +QIB_SL2VL_ATTR(14); +QIB_SL2VL_ATTR(15); + +static struct attribute *sl2vl_default_attributes[] = { + &qib_sl2vl_attr_0.attr, + &qib_sl2vl_attr_1.attr, + &qib_sl2vl_attr_2.attr, + &qib_sl2vl_attr_3.attr, + &qib_sl2vl_attr_4.attr, + &qib_sl2vl_attr_5.attr, + &qib_sl2vl_attr_6.attr, + &qib_sl2vl_attr_7.attr, + &qib_sl2vl_attr_8.attr, + &qib_sl2vl_attr_9.attr, + &qib_sl2vl_attr_10.attr, + &qib_sl2vl_attr_11.attr, + &qib_sl2vl_attr_12.attr, + &qib_sl2vl_attr_13.attr, + &qib_sl2vl_attr_14.attr, + &qib_sl2vl_attr_15.attr, + NULL +}; + +static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct qib_sl2vl_attr *sattr = + container_of(attr, struct qib_sl2vl_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, sl2vl_kobj); + struct qib_ibport *qibp = &ppd->ibport_data; + + return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); +} + +static const struct sysfs_ops qib_sl2vl_ops = { + .show = sl2vl_attr_show, +}; + +static struct kobj_type qib_sl2vl_ktype = { + .release = qib_port_release, + .sysfs_ops = &qib_sl2vl_ops, + .default_attrs = sl2vl_default_attributes +}; + +/* End sl2vl */ + +/* Start diag_counters */ + +#define QIB_DIAGC_ATTR(N) \ + static struct qib_diagc_attr qib_diagc_attr_##N = { \ + .attr = { .name = __stringify(N), .mode = 0664 }, \ + .counter = offsetof(struct qib_ibport, n_##N) \ + } + +struct qib_diagc_attr { + struct attribute attr; + size_t counter; +}; + +QIB_DIAGC_ATTR(rc_resends); +QIB_DIAGC_ATTR(rc_acks); +QIB_DIAGC_ATTR(rc_qacks); +QIB_DIAGC_ATTR(rc_delayed_comp); +QIB_DIAGC_ATTR(seq_naks); +QIB_DIAGC_ATTR(rdma_seq); +QIB_DIAGC_ATTR(rnr_naks); +QIB_DIAGC_ATTR(other_naks); +QIB_DIAGC_ATTR(rc_timeouts); +QIB_DIAGC_ATTR(loop_pkts); +QIB_DIAGC_ATTR(pkt_drops); +QIB_DIAGC_ATTR(dmawait); +QIB_DIAGC_ATTR(unaligned); +QIB_DIAGC_ATTR(rc_dupreq); +QIB_DIAGC_ATTR(rc_seqnak); + +static struct attribute *diagc_default_attributes[] = { + &qib_diagc_attr_rc_resends.attr, + &qib_diagc_attr_rc_acks.attr, + &qib_diagc_attr_rc_qacks.attr, + &qib_diagc_attr_rc_delayed_comp.attr, + &qib_diagc_attr_seq_naks.attr, + &qib_diagc_attr_rdma_seq.attr, + &qib_diagc_attr_rnr_naks.attr, + &qib_diagc_attr_other_naks.attr, + &qib_diagc_attr_rc_timeouts.attr, + &qib_diagc_attr_loop_pkts.attr, + &qib_diagc_attr_pkt_drops.attr, + &qib_diagc_attr_dmawait.attr, + &qib_diagc_attr_unaligned.attr, + &qib_diagc_attr_rc_dupreq.attr, + &qib_diagc_attr_rc_seqnak.attr, + NULL +}; + +static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct qib_diagc_attr *dattr = + container_of(attr, struct qib_diagc_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, diagc_kobj); + struct qib_ibport *qibp = &ppd->ibport_data; + + return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter)); +} + +static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) +{ + struct qib_diagc_attr *dattr = + container_of(attr, struct qib_diagc_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, diagc_kobj); + struct qib_ibport *qibp = &ppd->ibport_data; + char *endp; + long val = simple_strtol(buf, &endp, 0); + + if (val < 0 || endp == buf) + return -EINVAL; + + *(u32 *)((char *) qibp + dattr->counter) = val; + return size; +} + +static const struct sysfs_ops qib_diagc_ops = { + .show = diagc_attr_show, + .store = diagc_attr_store, +}; + +static struct kobj_type qib_diagc_ktype = { + .release = qib_port_release, + .sysfs_ops = &qib_diagc_ops, + .default_attrs = diagc_default_attributes +}; + +/* End diag_counters */ + +/* end of per-port file structures and support code */ + +/* + * Start of per-unit (or driver, in some cases, but replicated + * per unit) functions (these get a device *) + */ +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + + return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int ret; + + if (!dd->boardname) + ret = -EINVAL; + else + ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); + return ret; +} + +static ssize_t show_version(struct device *device, + struct device_attribute *attr, char *buf) +{ + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); +} + +static ssize_t show_boardversion(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); +} + + +static ssize_t show_localbus_info(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); +} + + +static ssize_t show_nctxts(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* Return the number of user ports (contexts) available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts - + dd->first_user_ctxt); +} + +static ssize_t show_nfreectxts(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* Return the number of free user ports (contexts) available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); +} + +static ssize_t show_serial(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + buf[sizeof dd->serial] = '\0'; + memcpy(buf, dd->serial, sizeof dd->serial); + strcat(buf, "\n"); + return strlen(buf); +} + +static ssize_t store_chip_reset(struct device *device, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int ret; + + if (count < 5 || memcmp(buf, "reset", 5) || !dd->diag_client) { + ret = -EINVAL; + goto bail; + } + + ret = qib_reset_device(dd->unit); +bail: + return ret < 0 ? ret : count; +} + +static ssize_t show_logged_errs(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int idx, count; + + /* force consistency with actual EEPROM */ + if (qib_update_eeprom_log(dd) != 0) + return -ENXIO; + + count = 0; + for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { + count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", + dd->eep_st_errs[idx], + idx == (QIB_EEP_LOG_CNT - 1) ? '\n' : ' '); + } + + return count; +} + +/* + * Dump tempsense regs. in decimal, to ease shell-scripts. + */ +static ssize_t show_tempsense(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int ret; + int idx; + u8 regvals[8]; + + ret = -ENXIO; + for (idx = 0; idx < 8; ++idx) { + if (idx == 6) + continue; + ret = dd->f_tempsense_rd(dd, idx); + if (ret < 0) + break; + regvals[idx] = ret; + } + if (idx == 8) + ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", + *(signed char *)(regvals), + *(signed char *)(regvals + 1), + regvals[2], regvals[3], + *(signed char *)(regvals + 5), + *(signed char *)(regvals + 7)); + return ret; +} + +/* + * end of per-unit (or driver, in some cases, but replicated + * per unit) functions + */ + +/* start of per-unit file structures and support code */ +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); +static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); +static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); +static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); +static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); +static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); +static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); +static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); +static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); + +static struct device_attribute *qib_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type, + &dev_attr_board_id, + &dev_attr_version, + &dev_attr_nctxts, + &dev_attr_nfreectxts, + &dev_attr_serial, + &dev_attr_boardversion, + &dev_attr_logged_errors, + &dev_attr_tempsense, + &dev_attr_localbus_info, + &dev_attr_chip_reset, +}; + +int qib_create_port_files(struct ib_device *ibdev, u8 port_num, + struct kobject *kobj) +{ + struct qib_pportdata *ppd; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + int ret; + + if (!port_num || port_num > dd->num_pports) { + qib_dev_err(dd, "Skipping infiniband class with " + "invalid port %u\n", port_num); + ret = -ENODEV; + goto bail; + } + ppd = &dd->pport[port_num - 1]; + + ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj, + "linkcontrol"); + if (ret) { + qib_dev_err(dd, "Skipping linkcontrol sysfs info, " + "(err %d) port %u\n", ret, port_num); + goto bail; + } + kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); + + ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj, + "sl2vl"); + if (ret) { + qib_dev_err(dd, "Skipping sl2vl sysfs info, " + "(err %d) port %u\n", ret, port_num); + goto bail_sl; + } + kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); + + ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj, + "diag_counters"); + if (ret) { + qib_dev_err(dd, "Skipping diag_counters sysfs info, " + "(err %d) port %u\n", ret, port_num); + goto bail_diagc; + } + kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); + + return 0; + +bail_diagc: + kobject_put(&ppd->sl2vl_kobj); +bail_sl: + kobject_put(&ppd->pport_kobj); +bail: + return ret; +} + +/* + * Register and create our files in /sys/class/infiniband. + */ +int qib_verbs_register_sysfs(struct qib_devdata *dd) +{ + struct ib_device *dev = &dd->verbs_dev.ibdev; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { + ret = device_create_file(&dev->dev, qib_attributes[i]); + if (ret) + return ret; + } + + return 0; +} + +/* + * Unregister and remove our files in /sys/class/infiniband. + */ +void qib_verbs_unregister_sysfs(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + int i; + + for (i = 0; i < dd->num_pports; i++) { + ppd = &dd->pport[i]; + kobject_put(&ppd->pport_kobj); + kobject_put(&ppd->sl2vl_kobj); + } +} diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c new file mode 100644 index 00000000..ddde72e1 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "qib.h" + +/* + * QLogic_IB "Two Wire Serial Interface" driver. + * Originally written for a not-quite-i2c serial eeprom, which is + * still used on some supported boards. Later boards have added a + * variety of other uses, most board-specific, so the bit-boffing + * part has been split off to this file, while the other parts + * have been moved to chip-specific files. + * + * We have also dropped all pretense of fully generic (e.g. pretend + * we don't know whether '1' is the higher voltage) interface, as + * the restrictions of the generic i2c interface (e.g. no access from + * driver itself) make it unsuitable for this use. + */ + +#define READ_CMD 1 +#define WRITE_CMD 0 + +/** + * i2c_wait_for_writes - wait for a write + * @dd: the qlogic_ib device + * + * We use this instead of udelay directly, so we can make sure + * that previous register writes have been flushed all the way + * to the chip. Since we are delaying anyway, the cost doesn't + * hurt, and makes the bit twiddling more regular + */ +static void i2c_wait_for_writes(struct qib_devdata *dd) +{ + /* + * implicit read of EXTStatus is as good as explicit + * read of scratch, if all we want to do is flush + * writes. + */ + dd->f_gpio_mod(dd, 0, 0, 0); + rmb(); /* inlined, so prevent compiler reordering */ +} + +/* + * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that + * for "almost compliant" modules + */ +#define SCL_WAIT_USEC 1000 + +/* BUF_WAIT is time bus must be free between STOP or ACK and to next START. + * Should be 20, but some chips need more. + */ +#define TWSI_BUF_WAIT_USEC 60 + +static void scl_out(struct qib_devdata *dd, u8 bit) +{ + u32 mask; + + udelay(1); + + mask = 1UL << dd->gpio_scl_num; + + /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ + dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask); + + /* + * Allow for slow slaves by simple + * delay for falling edge, sampling on rise. + */ + if (!bit) + udelay(2); + else { + int rise_usec; + for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) { + if (mask & dd->f_gpio_mod(dd, 0, 0, 0)) + break; + udelay(2); + } + if (rise_usec <= 0) + qib_dev_err(dd, "SCL interface stuck low > %d uSec\n", + SCL_WAIT_USEC); + } + i2c_wait_for_writes(dd); +} + +static void sda_out(struct qib_devdata *dd, u8 bit) +{ + u32 mask; + + mask = 1UL << dd->gpio_sda_num; + + /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ + dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask); + + i2c_wait_for_writes(dd); + udelay(2); +} + +static u8 sda_in(struct qib_devdata *dd, int wait) +{ + int bnum; + u32 read_val, mask; + + bnum = dd->gpio_sda_num; + mask = (1UL << bnum); + /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ + dd->f_gpio_mod(dd, 0, 0, mask); + read_val = dd->f_gpio_mod(dd, 0, 0, 0); + if (wait) + i2c_wait_for_writes(dd); + return (read_val & mask) >> bnum; +} + +/** + * i2c_ackrcv - see if ack following write is true + * @dd: the qlogic_ib device + */ +static int i2c_ackrcv(struct qib_devdata *dd) +{ + u8 ack_received; + + /* AT ENTRY SCL = LOW */ + /* change direction, ignore data */ + ack_received = sda_in(dd, 1); + scl_out(dd, 1); + ack_received = sda_in(dd, 1) == 0; + scl_out(dd, 0); + return ack_received; +} + +static void stop_cmd(struct qib_devdata *dd); + +/** + * rd_byte - read a byte, sending STOP on last, else ACK + * @dd: the qlogic_ib device + * + * Returns byte shifted out of device + */ +static int rd_byte(struct qib_devdata *dd, int last) +{ + int bit_cntr, data; + + data = 0; + + for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { + data <<= 1; + scl_out(dd, 1); + data |= sda_in(dd, 0); + scl_out(dd, 0); + } + if (last) { + scl_out(dd, 1); + stop_cmd(dd); + } else { + sda_out(dd, 0); + scl_out(dd, 1); + scl_out(dd, 0); + sda_out(dd, 1); + } + return data; +} + +/** + * wr_byte - write a byte, one bit at a time + * @dd: the qlogic_ib device + * @data: the byte to write + * + * Returns 0 if we got the following ack, otherwise 1 + */ +static int wr_byte(struct qib_devdata *dd, u8 data) +{ + int bit_cntr; + u8 bit; + + for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { + bit = (data >> bit_cntr) & 1; + sda_out(dd, bit); + scl_out(dd, 1); + scl_out(dd, 0); + } + return (!i2c_ackrcv(dd)) ? 1 : 0; +} + +/* + * issue TWSI start sequence: + * (both clock/data high, clock high, data low while clock is high) + */ +static void start_seq(struct qib_devdata *dd) +{ + sda_out(dd, 1); + scl_out(dd, 1); + sda_out(dd, 0); + udelay(1); + scl_out(dd, 0); +} + +/** + * stop_seq - transmit the stop sequence + * @dd: the qlogic_ib device + * + * (both clock/data low, clock high, data high while clock is high) + */ +static void stop_seq(struct qib_devdata *dd) +{ + scl_out(dd, 0); + sda_out(dd, 0); + scl_out(dd, 1); + sda_out(dd, 1); +} + +/** + * stop_cmd - transmit the stop condition + * @dd: the qlogic_ib device + * + * (both clock/data low, clock high, data high while clock is high) + */ +static void stop_cmd(struct qib_devdata *dd) +{ + stop_seq(dd); + udelay(TWSI_BUF_WAIT_USEC); +} + +/** + * qib_twsi_reset - reset I2C communication + * @dd: the qlogic_ib device + */ + +int qib_twsi_reset(struct qib_devdata *dd) +{ + int clock_cycles_left = 9; + int was_high = 0; + u32 pins, mask; + + /* Both SCL and SDA should be high. If not, there + * is something wrong. + */ + mask = (1UL << dd->gpio_scl_num) | (1UL << dd->gpio_sda_num); + + /* + * Force pins to desired innocuous state. + * This is the default power-on state with out=0 and dir=0, + * So tri-stated and should be floating high (barring HW problems) + */ + dd->f_gpio_mod(dd, 0, 0, mask); + + /* + * Clock nine times to get all listeners into a sane state. + * If SDA does not go high at any point, we are wedged. + * One vendor recommends then issuing START followed by STOP. + * we cannot use our "normal" functions to do that, because + * if SCL drops between them, another vendor's part will + * wedge, dropping SDA and keeping it low forever, at the end of + * the next transaction (even if it was not the device addressed). + * So our START and STOP take place with SCL held high. + */ + while (clock_cycles_left--) { + scl_out(dd, 0); + scl_out(dd, 1); + /* Note if SDA is high, but keep clocking to sync slave */ + was_high |= sda_in(dd, 0); + } + + if (was_high) { + /* + * We saw a high, which we hope means the slave is sync'd. + * Issue START, STOP, pause for T_BUF. + */ + + pins = dd->f_gpio_mod(dd, 0, 0, 0); + if ((pins & mask) != mask) + qib_dev_err(dd, "GPIO pins not at rest: %d\n", + pins & mask); + /* Drop SDA to issue START */ + udelay(1); /* Guarantee .6 uSec setup */ + sda_out(dd, 0); + udelay(1); /* Guarantee .6 uSec hold */ + /* At this point, SCL is high, SDA low. Raise SDA for STOP */ + sda_out(dd, 1); + udelay(TWSI_BUF_WAIT_USEC); + } + + return !was_high; +} + +#define QIB_TWSI_START 0x100 +#define QIB_TWSI_STOP 0x200 + +/* Write byte to TWSI, optionally prefixed with START or suffixed with + * STOP. + * returns 0 if OK (ACK received), else != 0 + */ +static int qib_twsi_wr(struct qib_devdata *dd, int data, int flags) +{ + int ret = 1; + if (flags & QIB_TWSI_START) + start_seq(dd); + + ret = wr_byte(dd, data); /* Leaves SCL low (from i2c_ackrcv()) */ + + if (flags & QIB_TWSI_STOP) + stop_cmd(dd); + return ret; +} + +/* Added functionality for IBA7220-based cards */ +#define QIB_TEMP_DEV 0x98 + +/* + * qib_twsi_blk_rd + * Formerly called qib_eeprom_internal_read, and only used for eeprom, + * but now the general interface for data transfer from twsi devices. + * One vestige of its former role is that it recognizes a device + * QIB_TWSI_NO_DEV and does the correct operation for the legacy part, + * which responded to all TWSI device codes, interpreting them as + * address within device. On all other devices found on board handled by + * this driver, the device is followed by a one-byte "address" which selects + * the "register" or "offset" within the device from which data should + * be read. + */ +int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, + void *buffer, int len) +{ + int ret; + u8 *bp = buffer; + + ret = 1; + + if (dev == QIB_TWSI_NO_DEV) { + /* legacy not-really-I2C */ + addr = (addr << 1) | READ_CMD; + ret = qib_twsi_wr(dd, addr, QIB_TWSI_START); + } else { + /* Actual I2C */ + ret = qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START); + if (ret) { + stop_cmd(dd); + ret = 1; + goto bail; + } + /* + * SFF spec claims we do _not_ stop after the addr + * but simply issue a start with the "read" dev-addr. + * Since we are implicitely waiting for ACK here, + * we need t_buf (nominally 20uSec) before that start, + * and cannot rely on the delay built in to the STOP + */ + ret = qib_twsi_wr(dd, addr, 0); + udelay(TWSI_BUF_WAIT_USEC); + + if (ret) { + qib_dev_err(dd, + "Failed to write interface read addr %02X\n", + addr); + ret = 1; + goto bail; + } + ret = qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START); + } + if (ret) { + stop_cmd(dd); + ret = 1; + goto bail; + } + + /* + * block devices keeps clocking data out as long as we ack, + * automatically incrementing the address. Some have "pages" + * whose boundaries will not be crossed, but the handling + * of these is left to the caller, who is in a better + * position to know. + */ + while (len-- > 0) { + /* + * Get and store data, sending ACK if length remaining, + * else STOP + */ + *bp++ = rd_byte(dd, !len); + } + + ret = 0; + +bail: + return ret; +} + +/* + * qib_twsi_blk_wr + * Formerly called qib_eeprom_internal_write, and only used for eeprom, + * but now the general interface for data transfer to twsi devices. + * One vestige of its former role is that it recognizes a device + * QIB_TWSI_NO_DEV and does the correct operation for the legacy part, + * which responded to all TWSI device codes, interpreting them as + * address within device. On all other devices found on board handled by + * this driver, the device is followed by a one-byte "address" which selects + * the "register" or "offset" within the device to which data should + * be written. + */ +int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, + const void *buffer, int len) +{ + int sub_len; + const u8 *bp = buffer; + int max_wait_time, i; + int ret; + ret = 1; + + while (len > 0) { + if (dev == QIB_TWSI_NO_DEV) { + if (qib_twsi_wr(dd, (addr << 1) | WRITE_CMD, + QIB_TWSI_START)) { + goto failed_write; + } + } else { + /* Real I2C */ + if (qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START)) + goto failed_write; + ret = qib_twsi_wr(dd, addr, 0); + if (ret) { + qib_dev_err(dd, "Failed to write interface" + " write addr %02X\n", addr); + goto failed_write; + } + } + + sub_len = min(len, 4); + addr += sub_len; + len -= sub_len; + + for (i = 0; i < sub_len; i++) + if (qib_twsi_wr(dd, *bp++, 0)) + goto failed_write; + + stop_cmd(dd); + + /* + * Wait for write complete by waiting for a successful + * read (the chip replies with a zero after the write + * cmd completes, and before it writes to the eeprom. + * The startcmd for the read will fail the ack until + * the writes have completed. We do this inline to avoid + * the debug prints that are in the real read routine + * if the startcmd fails. + * We also use the proper device address, so it doesn't matter + * whether we have real eeprom_dev. Legacy likes any address. + */ + max_wait_time = 100; + while (qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START)) { + stop_cmd(dd); + if (!--max_wait_time) + goto failed_write; + } + /* now read (and ignore) the resulting byte */ + rd_byte(dd, 1); + } + + ret = 0; + goto bail; + +failed_write: + stop_cmd(dd); + ret = 1; + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c new file mode 100644 index 00000000..1bf626c4 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -0,0 +1,562 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "qib.h" + +static unsigned qib_hol_timeout_ms = 3000; +module_param_named(hol_timeout_ms, qib_hol_timeout_ms, uint, S_IRUGO); +MODULE_PARM_DESC(hol_timeout_ms, + "duration of user app suspension after link failure"); + +unsigned qib_sdma_fetch_arb = 1; +module_param_named(fetch_arb, qib_sdma_fetch_arb, uint, S_IRUGO); +MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration"); + +/** + * qib_disarm_piobufs - cancel a range of PIO buffers + * @dd: the qlogic_ib device + * @first: the first PIO buffer to cancel + * @cnt: the number of PIO buffers to cancel + * + * Cancel a range of PIO buffers. Used at user process close, + * in case it died while writing to a PIO buffer. + */ +void qib_disarm_piobufs(struct qib_devdata *dd, unsigned first, unsigned cnt) +{ + unsigned long flags; + unsigned i; + unsigned last; + + last = first + cnt; + spin_lock_irqsave(&dd->pioavail_lock, flags); + for (i = first; i < last; i++) { + __clear_bit(i, dd->pio_need_disarm); + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i)); + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); +} + +/* + * This is called by a user process when it sees the DISARM_BUFS event + * bit is set. + */ +int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + unsigned i; + unsigned last; + unsigned n = 0; + + last = rcd->pio_base + rcd->piocnt; + /* + * Don't need uctxt_lock here, since user has called in to us. + * Clear at start in case more interrupts set bits while we + * are disarming + */ + if (rcd->user_event_mask) { + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + clear_bit(_QIB_EVENT_DISARM_BUFS_BIT, &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + clear_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + spin_lock_irq(&dd->pioavail_lock); + for (i = rcd->pio_base; i < last; i++) { + if (__test_and_clear_bit(i, dd->pio_need_disarm)) { + n++; + dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i)); + } + } + spin_unlock_irq(&dd->pioavail_lock); + return 0; +} + +static struct qib_pportdata *is_sdma_buf(struct qib_devdata *dd, unsigned i) +{ + struct qib_pportdata *ppd; + unsigned pidx; + + for (pidx = 0; pidx < dd->num_pports; pidx++) { + ppd = dd->pport + pidx; + if (i >= ppd->sdma_state.first_sendbuf && + i < ppd->sdma_state.last_sendbuf) + return ppd; + } + return NULL; +} + +/* + * Return true if send buffer is being used by a user context. + * Sets _QIB_EVENT_DISARM_BUFS_BIT in user_event_mask as a side effect + */ +static int find_ctxt(struct qib_devdata *dd, unsigned bufn) +{ + struct qib_ctxtdata *rcd; + unsigned ctxt; + int ret = 0; + + spin_lock(&dd->uctxt_lock); + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { + rcd = dd->rcd[ctxt]; + if (!rcd || bufn < rcd->pio_base || + bufn >= rcd->pio_base + rcd->piocnt) + continue; + if (rcd->user_event_mask) { + int i; + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + ret = 1; + break; + } + spin_unlock(&dd->uctxt_lock); + + return ret; +} + +/* + * Disarm a set of send buffers. If the buffer might be actively being + * written to, mark the buffer to be disarmed later when it is not being + * written to. + * + * This should only be called from the IRQ error handler. + */ +void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask, + unsigned cnt) +{ + struct qib_pportdata *ppd, *pppd[QIB_MAX_IB_PORTS]; + unsigned i; + unsigned long flags; + + for (i = 0; i < dd->num_pports; i++) + pppd[i] = NULL; + + for (i = 0; i < cnt; i++) { + int which; + if (!test_bit(i, mask)) + continue; + /* + * If the buffer is owned by the DMA hardware, + * reset the DMA engine. + */ + ppd = is_sdma_buf(dd, i); + if (ppd) { + pppd[ppd->port] = ppd; + continue; + } + /* + * If the kernel is writing the buffer or the buffer is + * owned by a user process, we can't clear it yet. + */ + spin_lock_irqsave(&dd->pioavail_lock, flags); + if (test_bit(i, dd->pio_writing) || + (!test_bit(i << 1, dd->pioavailkernel) && + find_ctxt(dd, i))) { + __set_bit(i, dd->pio_need_disarm); + which = 0; + } else { + which = 1; + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i)); + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + } + + /* do cancel_sends once per port that had sdma piobufs in error */ + for (i = 0; i < dd->num_pports; i++) + if (pppd[i]) + qib_cancel_sends(pppd[i]); +} + +/** + * update_send_bufs - update shadow copy of the PIO availability map + * @dd: the qlogic_ib device + * + * called whenever our local copy indicates we have run out of send buffers + */ +static void update_send_bufs(struct qib_devdata *dd) +{ + unsigned long flags; + unsigned i; + const unsigned piobregs = dd->pioavregs; + + /* + * If the generation (check) bits have changed, then we update the + * busy bit for the corresponding PIO buffer. This algorithm will + * modify positions to the value they already have in some cases + * (i.e., no change), but it's faster than changing only the bits + * that have changed. + * + * We would like to do this atomicly, to avoid spinlocks in the + * critical send path, but that's not really possible, given the + * type of changes, and that this routine could be called on + * multiple cpu's simultaneously, so we lock in this routine only, + * to avoid conflicting updates; all we change is the shadow, and + * it's a single 64 bit memory location, so by definition the update + * is atomic in terms of what other cpu's can see in testing the + * bits. The spin_lock overhead isn't too bad, since it only + * happens when all buffers are in use, so only cpu overhead, not + * latency or bandwidth is affected. + */ + if (!dd->pioavailregs_dma) + return; + spin_lock_irqsave(&dd->pioavail_lock, flags); + for (i = 0; i < piobregs; i++) { + u64 pchbusy, pchg, piov, pnew; + + piov = le64_to_cpu(dd->pioavailregs_dma[i]); + pchg = dd->pioavailkernel[i] & + ~(dd->pioavailshadow[i] ^ piov); + pchbusy = pchg << QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT; + if (pchg && (pchbusy & dd->pioavailshadow[i])) { + pnew = dd->pioavailshadow[i] & ~pchbusy; + pnew |= piov & pchbusy; + dd->pioavailshadow[i] = pnew; + } + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); +} + +/* + * Debugging code and stats updates if no pio buffers available. + */ +static noinline void no_send_bufs(struct qib_devdata *dd) +{ + dd->upd_pio_shadow = 1; + + /* not atomic, but if we lose a stat count in a while, that's OK */ + qib_stats.sps_nopiobufs++; +} + +/* + * Common code for normal driver send buffer allocation, and reserved + * allocation. + * + * Do appropriate marking as busy, etc. + * Returns buffer pointer if one is found, otherwise NULL. + */ +u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, + u32 first, u32 last) +{ + unsigned i, j, updated = 0; + unsigned nbufs; + unsigned long flags; + unsigned long *shadow = dd->pioavailshadow; + u32 __iomem *buf; + + if (!(dd->flags & QIB_PRESENT)) + return NULL; + + nbufs = last - first + 1; /* number in range to check */ + if (dd->upd_pio_shadow) { + /* + * Minor optimization. If we had no buffers on last call, + * start out by doing the update; continue and do scan even + * if no buffers were updated, to be paranoid. + */ + update_send_bufs(dd); + updated++; + } + i = first; +rescan: + /* + * While test_and_set_bit() is atomic, we do that and then the + * change_bit(), and the pair is not. See if this is the cause + * of the remaining armlaunch errors. + */ + spin_lock_irqsave(&dd->pioavail_lock, flags); + for (j = 0; j < nbufs; j++, i++) { + if (i > last) + i = first; + if (__test_and_set_bit((2 * i) + 1, shadow)) + continue; + /* flip generation bit */ + __change_bit(2 * i, shadow); + /* remember that the buffer can be written to now */ + __set_bit(i, dd->pio_writing); + break; + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + + if (j == nbufs) { + if (!updated) { + /* + * First time through; shadow exhausted, but may be + * buffers available, try an update and then rescan. + */ + update_send_bufs(dd); + updated++; + i = first; + goto rescan; + } + no_send_bufs(dd); + buf = NULL; + } else { + if (i < dd->piobcnt2k) + buf = (u32 __iomem *)(dd->pio2kbase + + i * dd->palign); + else if (i < dd->piobcnt2k + dd->piobcnt4k || !dd->piovl15base) + buf = (u32 __iomem *)(dd->pio4kbase + + (i - dd->piobcnt2k) * dd->align4k); + else + buf = (u32 __iomem *)(dd->piovl15base + + (i - (dd->piobcnt2k + dd->piobcnt4k)) * + dd->align4k); + if (pbufnum) + *pbufnum = i; + dd->upd_pio_shadow = 0; + } + + return buf; +} + +/* + * Record that the caller is finished writing to the buffer so we don't + * disarm it while it is being written and disarm it now if needed. + */ +void qib_sendbuf_done(struct qib_devdata *dd, unsigned n) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->pioavail_lock, flags); + __clear_bit(n, dd->pio_writing); + if (__test_and_clear_bit(n, dd->pio_need_disarm)) + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(n)); + spin_unlock_irqrestore(&dd->pioavail_lock, flags); +} + +/** + * qib_chg_pioavailkernel - change which send buffers are available for kernel + * @dd: the qlogic_ib device + * @start: the starting send buffer number + * @len: the number of send buffers + * @avail: true if the buffers are available for kernel use, false otherwise + */ +void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, + unsigned len, u32 avail, struct qib_ctxtdata *rcd) +{ + unsigned long flags; + unsigned end; + unsigned ostart = start; + + /* There are two bits per send buffer (busy and generation) */ + start *= 2; + end = start + len * 2; + + spin_lock_irqsave(&dd->pioavail_lock, flags); + /* Set or clear the busy bit in the shadow. */ + while (start < end) { + if (avail) { + unsigned long dma; + int i; + + /* + * The BUSY bit will never be set, because we disarm + * the user buffers before we hand them back to the + * kernel. We do have to make sure the generation + * bit is set correctly in shadow, since it could + * have changed many times while allocated to user. + * We can't use the bitmap functions on the full + * dma array because it is always little-endian, so + * we have to flip to host-order first. + * BITS_PER_LONG is slightly wrong, since it's + * always 64 bits per register in chip... + * We only work on 64 bit kernels, so that's OK. + */ + i = start / BITS_PER_LONG; + __clear_bit(QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT + start, + dd->pioavailshadow); + dma = (unsigned long) + le64_to_cpu(dd->pioavailregs_dma[i]); + if (test_bit((QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + + start) % BITS_PER_LONG, &dma)) + __set_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->pioavailshadow); + else + __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->pioavailshadow); + __set_bit(start, dd->pioavailkernel); + } else { + __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT, + dd->pioavailshadow); + __clear_bit(start, dd->pioavailkernel); + } + start += 2; + } + + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + + dd->f_txchk_change(dd, ostart, len, avail, rcd); +} + +/* + * Flush all sends that might be in the ready to send state, as well as any + * that are in the process of being sent. Used whenever we need to be + * sure the send side is idle. Cleans up all buffer state by canceling + * all pio buffers, and issuing an abort, which cleans up anything in the + * launch fifo. The cancel is superfluous on some chip versions, but + * it's safer to always do it. + * PIOAvail bits are updated by the chip as if a normal send had happened. + */ +void qib_cancel_sends(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + unsigned long flags; + unsigned ctxt; + unsigned i; + unsigned last; + + /* + * Tell PSM to disarm buffers again before trying to reuse them. + * We need to be sure the rcd doesn't change out from under us + * while we do so. We hold the two locks sequentially. We might + * needlessly set some need_disarm bits as a result, if the + * context is closed after we release the uctxt_lock, but that's + * fairly benign, and safer than nesting the locks. + */ + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { + spin_lock_irqsave(&dd->uctxt_lock, flags); + rcd = dd->rcd[ctxt]; + if (rcd && rcd->ppd == ppd) { + last = rcd->pio_base + rcd->piocnt; + if (rcd->user_event_mask) { + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, + * if any + */ + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + i = rcd->pio_base; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + spin_lock_irqsave(&dd->pioavail_lock, flags); + for (; i < last; i++) + __set_bit(i, dd->pio_need_disarm); + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + } else + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + } + + if (!(dd->flags & QIB_HAS_SEND_DMA)) + dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL | + QIB_SENDCTRL_FLUSH); +} + +/* + * Force an update of in-memory copy of the pioavail registers, when + * needed for any of a variety of reasons. + * If already off, this routine is a nop, on the assumption that the + * caller (or set of callers) will "do the right thing". + * This is a per-device operation, so just the first port. + */ +void qib_force_pio_avail_update(struct qib_devdata *dd) +{ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); +} + +void qib_hol_down(struct qib_pportdata *ppd) +{ + /* + * Cancel sends when the link goes DOWN so that we aren't doing it + * at INIT when we might be trying to send SMI packets. + */ + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + qib_cancel_sends(ppd); +} + +/* + * Link is at INIT. + * We start the HoL timer so we can detect stuck packets blocking SMP replies. + * Timer may already be running, so use mod_timer, not add_timer. + */ +void qib_hol_init(struct qib_pportdata *ppd) +{ + if (ppd->hol_state != QIB_HOL_INIT) { + ppd->hol_state = QIB_HOL_INIT; + mod_timer(&ppd->hol_timer, + jiffies + msecs_to_jiffies(qib_hol_timeout_ms)); + } +} + +/* + * Link is up, continue any user processes, and ensure timer + * is a nop, if running. Let timer keep running, if set; it + * will nop when it sees the link is up. + */ +void qib_hol_up(struct qib_pportdata *ppd) +{ + ppd->hol_state = QIB_HOL_UP; +} + +/* + * This is only called via the timer. + */ +void qib_hol_event(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + + /* If hardware error, etc, skip. */ + if (!(ppd->dd->flags & QIB_INITTED)) + return; + + if (ppd->hol_state != QIB_HOL_UP) { + /* + * Try to flush sends in case a stuck packet is blocking + * SMP replies. + */ + qib_hol_down(ppd); + mod_timer(&ppd->hol_timer, + jiffies + msecs_to_jiffies(qib_hol_timeout_ms)); + } +} diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c new file mode 100644 index 00000000..7ce2ac2e --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "qib.h" + +/* cut down ridiculously long IB macro names */ +#define OP(x) IB_OPCODE_UC_##x + +/** + * qib_make_uc_req - construct a request packet (SEND, RDMA write) + * @qp: a pointer to the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int qib_make_uc_req(struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + struct qib_swqe *wqe; + unsigned long flags; + u32 hwords; + u32 bth0; + u32 len; + u32 pmtu = qp->pmtu; + int ret = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) { + if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } + + ohdr = &qp->s_hdr.u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr.u.l.oth; + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + bth0 = 0; + + /* Get the next send request. */ + wqe = get_swqe_ptr(qp, qp->s_cur); + qp->s_wqe = NULL; + switch (qp->s_state) { + default: + if (!(ib_qib_state_ops[qp->state] & + QIB_PROCESS_NEXT_SEND_OK)) + goto bail; + /* Check if send work queue is empty. */ + if (qp->s_cur == qp->s_head) + goto bail; + /* + * Start a new request. + */ + wqe->psn = qp->s_next_psn; + qp->s_psn = qp->s_next_psn; + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_sge.total_len = wqe->length; + len = wqe->length; + qp->s_len = len; + switch (wqe->wr.opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + if (len > pmtu) { + qp->s_state = OP(SEND_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_ONLY); + else { + qp->s_state = + OP(SEND_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + hwords += sizeof(struct ib_reth) / 4; + if (len > pmtu) { + qp->s_state = OP(RDMA_WRITE_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_ONLY); + else { + qp->s_state = + OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the RETH */ + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + default: + goto bail; + } + break; + + case OP(SEND_FIRST): + qp->s_state = OP(SEND_MIDDLE); + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_LAST); + else { + qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_WRITE_FIRST): + qp->s_state = OP(RDMA_WRITE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_LAST); + else { + qp->s_state = + OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + } + qp->s_len -= len; + qp->s_hdrwords = hwords; + qp->s_cur_sge = &qp->s_sge; + qp->s_cur_size = len; + qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), + qp->s_next_psn++ & QIB_PSN_MASK); +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * qib_uc_rcv - handle an incoming UC packet + * @ibp: the port the packet came in on + * @hdr: the header of the packet + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the length of the packet + * @qp: the QP for this packet. + * + * This is called from qib_qp_rcv() to process an incoming UC packet + * for the given QP. + * Called at interrupt level. + */ +void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + u32 opcode; + u32 hdrsize; + u32 psn; + u32 pad; + struct ib_wc wc; + u32 pmtu = qp->pmtu; + struct ib_reth *reth; + int ret; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12; /* LRH + BTH */ + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ + } + + opcode = be32_to_cpu(ohdr->bth[0]); + if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) + return; + + psn = be32_to_cpu(ohdr->bth[2]); + opcode >>= 24; + + /* Compare the PSN verses the expected PSN. */ + if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { + /* + * Handle a sequence error. + * Silently drop any current message. + */ + qp->r_psn = psn; +inv: + if (qp->r_state == OP(SEND_FIRST) || + qp->r_state == OP(SEND_MIDDLE)) { + set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); + qp->r_sge.num_sge = 0; + } else + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + qp->r_state = OP(SEND_LAST); + switch (opcode) { + case OP(SEND_FIRST): + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): + goto send_first; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + goto rdma_first; + + default: + goto drop; + } + } + + /* Check for opcode sequence errors. */ + switch (qp->r_state) { + case OP(SEND_FIRST): + case OP(SEND_MIDDLE): + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + break; + goto inv; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_MIDDLE): + if (opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + break; + goto inv; + + default: + if (opcode == OP(SEND_FIRST) || + opcode == OP(SEND_ONLY) || + opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_FIRST) || + opcode == OP(RDMA_WRITE_ONLY) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + break; + goto inv; + } + + if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { + qp->r_flags |= QIB_R_COMM_EST; + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_COMM_EST; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + } + + /* OK, process the packet. */ + switch (opcode) { + case OP(SEND_FIRST): + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): +send_first: + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + qp->r_sge = qp->s_rdma_read_sge; + else { + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto op_err; + if (!ret) + goto drop; + /* + * qp->s_rdma_read_sge will be the owner + * of the mr references. + */ + qp->s_rdma_read_sge = qp->r_sge; + } + qp->r_rcv_len = 0; + if (opcode == OP(SEND_ONLY)) + goto no_immediate_data; + else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) + goto send_last_imm; + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto rewind; + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) + goto rewind; + qib_copy_sge(&qp->r_sge, data, pmtu, 0); + break; + + case OP(SEND_LAST_WITH_IMMEDIATE): +send_last_imm: + wc.ex.imm_data = ohdr->u.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; + case OP(SEND_LAST): +no_immediate_data: + wc.ex.imm_data = 0; + wc.wc_flags = 0; +send_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto rewind; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + wc.byte_len = tlen + qp->r_rcv_len; + if (unlikely(wc.byte_len > qp->r_len)) + goto rewind; + wc.opcode = IB_WC_RECV; +last_imm: + qib_copy_sge(&qp->r_sge, data, tlen, 0); + while (qp->s_rdma_read_sge.num_sge) { + atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); + if (--qp->s_rdma_read_sge.num_sge) + qp->s_rdma_read_sge.sge = + *qp->s_rdma_read_sge.sg_list++; + } + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); + break; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ +rdma_first: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) { + goto drop; + } + reth = &ohdr->u.rc.reth; + hdrsize += sizeof(*reth); + qp->r_len = be32_to_cpu(reth->length); + qp->r_rcv_len = 0; + qp->r_sge.sg_list = NULL; + if (qp->r_len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey */ + ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, + vaddr, rkey, IB_ACCESS_REMOTE_WRITE); + if (unlikely(!ok)) + goto drop; + qp->r_sge.num_sge = 1; + } else { + qp->r_sge.num_sge = 0; + qp->r_sge.sge.mr = NULL; + qp->r_sge.sge.vaddr = NULL; + qp->r_sge.sge.length = 0; + qp->r_sge.sge.sge_length = 0; + } + if (opcode == OP(RDMA_WRITE_ONLY)) + goto rdma_last; + else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { + wc.ex.imm_data = ohdr->u.rc.imm_data; + goto rdma_last_imm; + } + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto drop; + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) + goto drop; + qib_copy_sge(&qp->r_sge, data, pmtu, 1); + break; + + case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): + wc.ex.imm_data = ohdr->u.imm_data; +rdma_last_imm: + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto drop; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) + goto drop; + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + while (qp->s_rdma_read_sge.num_sge) { + atomic_dec(&qp->s_rdma_read_sge.sge.mr-> + refcount); + if (--qp->s_rdma_read_sge.num_sge) + qp->s_rdma_read_sge.sge = + *qp->s_rdma_read_sge.sg_list++; + } + else { + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto op_err; + if (!ret) + goto drop; + } + wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + goto last_imm; + + case OP(RDMA_WRITE_LAST): +rdma_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto drop; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) + goto drop; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + break; + + default: + /* Drop packet for unknown opcodes. */ + goto drop; + } + qp->r_psn++; + qp->r_state = opcode; + return; + +rewind: + set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); + qp->r_sge.num_sge = 0; +drop: + ibp->n_pkt_drops++; + return; + +op_err: + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + return; + +} diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c new file mode 100644 index 00000000..828609fa --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "qib.h" +#include "qib_mad.h" + +/** + * qib_ud_loopback - handle send on loopback QPs + * @sqp: the sending QP + * @swqe: the send work request + * + * This is called from qib_make_ud_req() to forward a WQE addressed + * to the same HCA. + * Note that the receive interrupt handler may be calling qib_ud_rcv() + * while this is being called. + */ +static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) +{ + struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); + struct qib_pportdata *ppd; + struct qib_qp *qp; + struct ib_ah_attr *ah_attr; + unsigned long flags; + struct qib_sge_state ssge; + struct qib_sge *sge; + struct ib_wc wc; + u32 length; + + qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); + if (!qp) { + ibp->n_pkt_drops++; + return; + } + if (qp->ibqp.qp_type != sqp->ibqp.qp_type || + !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto drop; + } + + ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ppd = ppd_from_ibp(ibp); + + if (qp->ibqp.qp_num > 1) { + u16 pkey1; + u16 pkey2; + u16 lid; + + pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index); + pkey2 = qib_get_pkey(ibp, qp->s_pkey_index); + if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { + lid = ppd->lid | (ah_attr->src_path_bits & + ((1 << ppd->lmc) - 1)); + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1, + ah_attr->sl, + sqp->ibqp.qp_num, qp->ibqp.qp_num, + cpu_to_be16(lid), + cpu_to_be16(ah_attr->dlid)); + goto drop; + } + } + + /* + * Check that the qkey matches (except for QP0, see 9.6.1.4.1). + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + if (qp->ibqp.qp_num) { + u32 qkey; + + qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? + sqp->qkey : swqe->wr.wr.ud.remote_qkey; + if (unlikely(qkey != qp->qkey)) { + u16 lid; + + lid = ppd->lid | (ah_attr->src_path_bits & + ((1 << ppd->lmc) - 1)); + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, + ah_attr->sl, + sqp->ibqp.qp_num, qp->ibqp.qp_num, + cpu_to_be16(lid), + cpu_to_be16(ah_attr->dlid)); + goto drop; + } + } + + /* + * A GRH is expected to precede the data even if not + * present on the wire. + */ + length = swqe->length; + memset(&wc, 0, sizeof wc); + wc.byte_len = length + sizeof(struct ib_grh); + + if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = swqe->wr.ex.imm_data; + } + + spin_lock_irqsave(&qp->r_lock, flags); + + /* + * Get the next work request entry to find where to put the data. + */ + if (qp->r_flags & QIB_R_REUSE_SGE) + qp->r_flags &= ~QIB_R_REUSE_SGE; + else { + int ret; + + ret = qib_get_rwqe(qp, 0); + if (ret < 0) { + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + goto bail_unlock; + } + if (!ret) { + if (qp->ibqp.qp_num == 0) + ibp->n_vl15_dropped++; + goto bail_unlock; + } + } + /* Silently drop packets which are too big. */ + if (unlikely(wc.byte_len > qp->r_len)) { + qp->r_flags |= QIB_R_REUSE_SGE; + ibp->n_pkt_drops++; + goto bail_unlock; + } + + if (ah_attr->ah_flags & IB_AH_GRH) { + qib_copy_sge(&qp->r_sge, &ah_attr->grh, + sizeof(struct ib_grh), 1); + wc.wc_flags |= IB_WC_GRH; + } else + qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); + ssge.sg_list = swqe->sg_list + 1; + ssge.sge = *swqe->sg_list; + ssge.num_sge = swqe->wr.num_sge; + sge = &ssge.sge; + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ssge.num_sge) + *sge = *ssge.sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + length -= len; + } + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + goto bail_unlock; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + wc.src_qp = sqp->ibqp.qp_num; + wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? + swqe->wr.wr.ud.pkey_index : 0; + wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); + wc.sl = ah_attr->sl; + wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); + wc.port_num = qp->port_num; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + swqe->wr.send_flags & IB_SEND_SOLICITED); + ibp->n_loop_pkts++; +bail_unlock: + spin_unlock_irqrestore(&qp->r_lock, flags); +drop: + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +/** + * qib_make_ud_req - construct a UD request packet + * @qp: the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int qib_make_ud_req(struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + struct ib_ah_attr *ah_attr; + struct qib_pportdata *ppd; + struct qib_ibport *ibp; + struct qib_swqe *wqe; + unsigned long flags; + u32 nwords; + u32 extra_bytes; + u32 bth0; + u16 lrh0; + u16 lid; + int ret = 0; + int next_cur; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) { + if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } + + if (qp->s_cur == qp->s_head) + goto bail; + + wqe = get_swqe_ptr(qp, qp->s_cur); + next_cur = qp->s_cur + 1; + if (next_cur >= qp->s_size) + next_cur = 0; + + /* Construct the header. */ + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { + if (ah_attr->dlid != QIB_PERMISSIVE_LID) + ibp->n_multicast_xmit++; + else + ibp->n_unicast_xmit++; + } else { + ibp->n_unicast_xmit++; + lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); + if (unlikely(lid == ppd->lid)) { + /* + * If DMAs are in progress, we can't generate + * a completion for the loopback packet since + * it would be out of order. + * XXX Instead of waiting, we could queue a + * zero length descriptor so we get a callback. + */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + qp->s_cur = next_cur; + spin_unlock_irqrestore(&qp->s_lock, flags); + qib_ud_loopback(qp, wqe); + spin_lock_irqsave(&qp->s_lock, flags); + qib_send_complete(qp, wqe, IB_WC_SUCCESS); + goto done; + } + } + + qp->s_cur = next_cur; + extra_bytes = -wqe->length & 3; + nwords = (wqe->length + extra_bytes) >> 2; + + /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ + qp->s_hdrwords = 7; + qp->s_cur_size = wqe->length; + qp->s_cur_sge = &qp->s_sge; + qp->s_srate = ah_attr->static_rate; + qp->s_wqe = wqe; + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_sge.total_len = wqe->length; + + if (ah_attr->ah_flags & IB_AH_GRH) { + /* Header size in 32-bit words. */ + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + &ah_attr->grh, + qp->s_hdrwords, nwords); + lrh0 = QIB_LRH_GRH; + ohdr = &qp->s_hdr.u.l.oth; + /* + * Don't worry about sending to locally attached multicast + * QPs. It is unspecified by the spec. what happens. + */ + } else { + /* Header size in 32-bit words. */ + lrh0 = QIB_LRH_BTH; + ohdr = &qp->s_hdr.u.oth; + } + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + qp->s_hdrwords++; + ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; + bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; + } else + bth0 = IB_OPCODE_UD_SEND_ONLY << 24; + lrh0 |= ah_attr->sl << 4; + if (qp->ibqp.qp_type == IB_QPT_SMI) + lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ + else + lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; + qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + lid = ppd->lid; + if (lid) { + lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); + qp->s_hdr.lrh[3] = cpu_to_be16(lid); + } else + qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth0 |= extra_bytes << 20; + bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY : + qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ? + wqe->wr.wr.ud.pkey_index : qp->s_pkey_index); + ohdr->bth[0] = cpu_to_be32(bth0); + /* + * Use the multicast QP if the destination LID is a multicast LID. + */ + ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE && + ah_attr->dlid != QIB_PERMISSIVE_LID ? + cpu_to_be32(QIB_MULTICAST_QPN) : + cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); + /* + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? + qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); + +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey) +{ + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = ppd->dd; + unsigned ctxt = ppd->hw_pidx; + unsigned i; + + pkey &= 0x7fff; /* remove limited/full membership bit */ + + for (i = 0; i < ARRAY_SIZE(dd->rcd[ctxt]->pkeys); ++i) + if ((dd->rcd[ctxt]->pkeys[i] & 0x7fff) == pkey) + return i; + + /* + * Should not get here, this means hardware failed to validate pkeys. + * Punt and return index 0. + */ + return 0; +} + +/** + * qib_ud_rcv - receive an incoming UD packet + * @ibp: the port the packet came in on + * @hdr: the packet header + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP the packet came on + * + * This is called from qib_qp_rcv() to process an incoming UD packet + * for the given QP. + * Called at interrupt level. + */ +void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + int opcode; + u32 hdrsize; + u32 pad; + struct ib_wc wc; + u32 qkey; + u32 src_qp; + u16 dlid; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */ + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */ + } + qkey = be32_to_cpu(ohdr->u.ud.deth[0]); + src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK; + + /* + * Get the number of bytes the message was padded by + * and drop incomplete packets. + */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + if (unlikely(tlen < (hdrsize + pad + 4))) + goto drop; + + tlen -= hdrsize + pad + 4; + + /* + * Check that the permissive LID is only used on QP0 + * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). + */ + if (qp->ibqp.qp_num) { + if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || + hdr->lrh[3] == IB_LID_PERMISSIVE)) + goto drop; + if (qp->ibqp.qp_num > 1) { + u16 pkey1, pkey2; + + pkey1 = be32_to_cpu(ohdr->bth[0]); + pkey2 = qib_get_pkey(ibp, qp->s_pkey_index); + if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, + pkey1, + (be16_to_cpu(hdr->lrh[0]) >> 4) & + 0xF, + src_qp, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + return; + } + } + if (unlikely(qkey != qp->qkey)) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + src_qp, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + return; + } + /* Drop invalid MAD packets (see 13.5.3.1). */ + if (unlikely(qp->ibqp.qp_num == 1 && + (tlen != 256 || + (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) + goto drop; + } else { + struct ib_smp *smp; + + /* Drop invalid MAD packets (see 13.5.3.1). */ + if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) + goto drop; + smp = (struct ib_smp *) data; + if ((hdr->lrh[1] == IB_LID_PERMISSIVE || + hdr->lrh[3] == IB_LID_PERMISSIVE) && + smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + goto drop; + } + + /* + * The opcode is in the low byte when its in network order + * (top byte when in host order). + */ + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (qp->ibqp.qp_num > 1 && + opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { + wc.ex.imm_data = ohdr->u.ud.imm_data; + wc.wc_flags = IB_WC_WITH_IMM; + tlen -= sizeof(u32); + } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { + wc.ex.imm_data = 0; + wc.wc_flags = 0; + } else + goto drop; + + /* + * A GRH is expected to precede the data even if not + * present on the wire. + */ + wc.byte_len = tlen + sizeof(struct ib_grh); + + /* + * Get the next work request entry to find where to put the data. + */ + if (qp->r_flags & QIB_R_REUSE_SGE) + qp->r_flags &= ~QIB_R_REUSE_SGE; + else { + int ret; + + ret = qib_get_rwqe(qp, 0); + if (ret < 0) { + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + return; + } + if (!ret) { + if (qp->ibqp.qp_num == 0) + ibp->n_vl15_dropped++; + return; + } + } + /* Silently drop packets which are too big. */ + if (unlikely(wc.byte_len > qp->r_len)) { + qp->r_flags |= QIB_R_REUSE_SGE; + goto drop; + } + if (has_grh) { + qib_copy_sge(&qp->r_sge, &hdr->u.l.grh, + sizeof(struct ib_grh), 1); + wc.wc_flags |= IB_WC_GRH; + } else + qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); + qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + return; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.qp = &qp->ibqp; + wc.src_qp = src_qp; + wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? + qib_lookup_pkey(ibp, be32_to_cpu(ohdr->bth[0])) : 0; + wc.slid = be16_to_cpu(hdr->lrh[3]); + wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF; + dlid = be16_to_cpu(hdr->lrh[1]); + /* + * Save the LMC lower bits if the destination LID is a unicast LID. + */ + wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 : + dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); + wc.port_num = qp->port_num; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); + return; + +drop: + ibp->n_pkt_drops++; +} diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c new file mode 100644 index 00000000..2bc1d2b9 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "qib.h" + +static void __qib_release_user_pages(struct page **p, size_t num_pages, + int dirty) +{ + size_t i; + + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty_lock(p[i]); + put_page(p[i]); + } +} + +/* + * Call with current->mm->mmap_sem held. + */ +static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p, struct vm_area_struct **vma) +{ + unsigned long lock_limit; + size_t got; + int ret; + + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto bail; + } + + for (got = 0; got < num_pages; got += ret) { + ret = get_user_pages(current, current->mm, + start_page + got * PAGE_SIZE, + num_pages - got, 1, 1, + p + got, vma); + if (ret < 0) + goto bail_release; + } + + current->mm->pinned_vm += num_pages; + + ret = 0; + goto bail; + +bail_release: + __qib_release_user_pages(p, got, 0); +bail: + return ret; +} + +/** + * qib_map_page - a safety wrapper around pci_map_page() + * + * A dma_addr of all 0's is interpreted by the chip as "disabled". + * Unfortunately, it can also be a valid dma_addr returned on some + * architectures. + * + * The powerpc iommu assigns dma_addrs in ascending order, so we don't + * have to bother with retries or mapping a dummy page to insure we + * don't just get the same mapping again. + * + * I'm sure we won't be so lucky with other iommu's, so FIXME. + */ +dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + dma_addr_t phys; + + phys = pci_map_page(hwdev, page, offset, size, direction); + + if (phys == 0) { + pci_unmap_page(hwdev, phys, size, direction); + phys = pci_map_page(hwdev, page, offset, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** + * qib_get_user_pages - lock user pages into memory + * @start_page: the start page + * @num_pages: the number of pages + * @p: the output page structures + * + * This function takes a given start page (page aligned user virtual + * address) and pins it and the following specified number of pages. For + * now, num_pages is always 1, but that will probably change at some point + * (because caller is doing expected sends on a single virtually contiguous + * buffer, so we can do all pages at once). + */ +int qib_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p) +{ + int ret; + + down_write(¤t->mm->mmap_sem); + + ret = __qib_get_user_pages(start_page, num_pages, p, NULL); + + up_write(¤t->mm->mmap_sem); + + return ret; +} + +void qib_release_user_pages(struct page **p, size_t num_pages) +{ + if (current->mm) /* during close after signal, mm can be NULL */ + down_write(¤t->mm->mmap_sem); + + __qib_release_user_pages(p, num_pages, 1); + + if (current->mm) { + current->mm->pinned_vm -= num_pages; + up_write(¤t->mm->mmap_sem); + } +} diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c new file mode 100644 index 00000000..82442085 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -0,0 +1,898 @@ +/* + * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qib.h" +#include "qib_user_sdma.h" + +/* minimum size of header */ +#define QIB_USER_SDMA_MIN_HEADER_LENGTH 64 +/* expected size of headers (for dma_pool) */ +#define QIB_USER_SDMA_EXP_HEADER_LENGTH 64 +/* attempt to drain the queue for 5secs */ +#define QIB_USER_SDMA_DRAIN_TIMEOUT 500 + +struct qib_user_sdma_pkt { + u8 naddr; /* dimension of addr (1..3) ... */ + u32 counter; /* sdma pkts queued counter for this entry */ + u64 added; /* global descq number of entries */ + + struct { + u32 offset; /* offset for kvaddr, addr */ + u32 length; /* length in page */ + u8 put_page; /* should we put_page? */ + u8 dma_mapped; /* is page dma_mapped? */ + struct page *page; /* may be NULL (coherent mem) */ + void *kvaddr; /* FIXME: only for pio hack */ + dma_addr_t addr; + } addr[4]; /* max pages, any more and we coalesce */ + struct list_head list; /* list element */ +}; + +struct qib_user_sdma_queue { + /* + * pkts sent to dma engine are queued on this + * list head. the type of the elements of this + * list are struct qib_user_sdma_pkt... + */ + struct list_head sent; + + /* headers with expected length are allocated from here... */ + char header_cache_name[64]; + struct dma_pool *header_cache; + + /* packets are allocated from the slab cache... */ + char pkt_slab_name[64]; + struct kmem_cache *pkt_slab; + + /* as packets go on the queued queue, they are counted... */ + u32 counter; + u32 sent_counter; + + /* dma page table */ + struct rb_root dma_pages_root; + + /* protect everything above... */ + struct mutex lock; +}; + +struct qib_user_sdma_queue * +qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) +{ + struct qib_user_sdma_queue *pq = + kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + + if (!pq) + goto done; + + pq->counter = 0; + pq->sent_counter = 0; + INIT_LIST_HEAD(&pq->sent); + + mutex_init(&pq->lock); + + snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), + "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt); + pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name, + sizeof(struct qib_user_sdma_pkt), + 0, 0, NULL); + + if (!pq->pkt_slab) + goto err_kfree; + + snprintf(pq->header_cache_name, sizeof(pq->header_cache_name), + "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt); + pq->header_cache = dma_pool_create(pq->header_cache_name, + dev, + QIB_USER_SDMA_EXP_HEADER_LENGTH, + 4, 0); + if (!pq->header_cache) + goto err_slab; + + pq->dma_pages_root = RB_ROOT; + + goto done; + +err_slab: + kmem_cache_destroy(pq->pkt_slab); +err_kfree: + kfree(pq); + pq = NULL; + +done: + return pq; +} + +static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, + int i, size_t offset, size_t len, + int put_page, int dma_mapped, + struct page *page, + void *kvaddr, dma_addr_t dma_addr) +{ + pkt->addr[i].offset = offset; + pkt->addr[i].length = len; + pkt->addr[i].put_page = put_page; + pkt->addr[i].dma_mapped = dma_mapped; + pkt->addr[i].page = page; + pkt->addr[i].kvaddr = kvaddr; + pkt->addr[i].addr = dma_addr; +} + +static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt, + u32 counter, size_t offset, + size_t len, int dma_mapped, + struct page *page, + void *kvaddr, dma_addr_t dma_addr) +{ + pkt->naddr = 1; + pkt->counter = counter; + qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, + kvaddr, dma_addr); +} + +/* we've too many pages in the iovec, coalesce to a single page */ +static int qib_user_sdma_coalesce(const struct qib_devdata *dd, + struct qib_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov) +{ + int ret = 0; + struct page *page = alloc_page(GFP_KERNEL); + void *mpage_save; + char *mpage; + int i; + int len = 0; + dma_addr_t dma_addr; + + if (!page) { + ret = -ENOMEM; + goto done; + } + + mpage = kmap(page); + mpage_save = mpage; + for (i = 0; i < niov; i++) { + int cfur; + + cfur = copy_from_user(mpage, + iov[i].iov_base, iov[i].iov_len); + if (cfur) { + ret = -EFAULT; + goto free_unmap; + } + + mpage += iov[i].iov_len; + len += iov[i].iov_len; + } + + dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = -ENOMEM; + goto free_unmap; + } + + qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, + dma_addr); + pkt->naddr = 2; + + goto done; + +free_unmap: + kunmap(page); + __free_page(page); +done: + return ret; +} + +/* + * How many pages in this iovec element? + */ +static int qib_user_sdma_num_pages(const struct iovec *iov) +{ + const unsigned long addr = (unsigned long) iov->iov_base; + const unsigned long len = iov->iov_len; + const unsigned long spage = addr & PAGE_MASK; + const unsigned long epage = (addr + len - 1) & PAGE_MASK; + + return 1 + ((epage - spage) >> PAGE_SHIFT); +} + +/* + * Truncate length to page boundary. + */ +static int qib_user_sdma_page_length(unsigned long addr, unsigned long len) +{ + const unsigned long offset = addr & ~PAGE_MASK; + + return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; +} + +static void qib_user_sdma_free_pkt_frag(struct device *dev, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + int frag) +{ + const int i = frag; + + if (pkt->addr[i].page) { + if (pkt->addr[i].dma_mapped) + dma_unmap_page(dev, + pkt->addr[i].addr, + pkt->addr[i].length, + DMA_TO_DEVICE); + + if (pkt->addr[i].kvaddr) + kunmap(pkt->addr[i].page); + + if (pkt->addr[i].put_page) + put_page(pkt->addr[i].page); + else + __free_page(pkt->addr[i].page); + } else if (pkt->addr[i].kvaddr) + /* free coherent mem from cache... */ + dma_pool_free(pq->header_cache, + pkt->addr[i].kvaddr, pkt->addr[i].addr); +} + +/* return number of pages pinned... */ +static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, + struct qib_user_sdma_pkt *pkt, + unsigned long addr, int tlen, int npages) +{ + struct page *pages[2]; + int j; + int ret; + + ret = get_user_pages(current, current->mm, addr, + npages, 0, 1, pages, NULL); + + if (ret != npages) { + int i; + + for (i = 0; i < ret; i++) + put_page(pages[i]); + + ret = -ENOMEM; + goto done; + } + + for (j = 0; j < npages; j++) { + /* map the pages... */ + const int flen = qib_user_sdma_page_length(addr, tlen); + dma_addr_t dma_addr = + dma_map_page(&dd->pcidev->dev, + pages[j], 0, flen, DMA_TO_DEVICE); + unsigned long fofs = addr & ~PAGE_MASK; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = -ENOMEM; + goto done; + } + + qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, + pages[j], kmap(pages[j]), dma_addr); + + pkt->naddr++; + addr += flen; + tlen -= flen; + } + +done: + return ret; +} + +static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov) +{ + int ret = 0; + unsigned long idx; + + for (idx = 0; idx < niov; idx++) { + const int npages = qib_user_sdma_num_pages(iov + idx); + const unsigned long addr = (unsigned long) iov[idx].iov_base; + + ret = qib_user_sdma_pin_pages(dd, pkt, addr, + iov[idx].iov_len, npages); + if (ret < 0) + goto free_pkt; + } + + goto done; + +free_pkt: + for (idx = 0; idx < pkt->naddr; idx++) + qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); + +done: + return ret; +} + +static int qib_user_sdma_init_payload(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov, int npages) +{ + int ret = 0; + + if (npages >= ARRAY_SIZE(pkt->addr)) + ret = qib_user_sdma_coalesce(dd, pkt, iov, niov); + else + ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); + + return ret; +} + +/* free a packet list -- return counter value of last packet */ +static void qib_user_sdma_free_pkt_list(struct device *dev, + struct qib_user_sdma_queue *pq, + struct list_head *list) +{ + struct qib_user_sdma_pkt *pkt, *pkt_next; + + list_for_each_entry_safe(pkt, pkt_next, list, list) { + int i; + + for (i = 0; i < pkt->naddr; i++) + qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); + + kmem_cache_free(pq->pkt_slab, pkt); + } + INIT_LIST_HEAD(list); +} + +/* + * copy headers, coalesce etc -- pq->lock must be held + * + * we queue all the packets to list, returning the + * number of bytes total. list must be empty initially, + * as, if there is an error we clean it... + */ +static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct list_head *list, + const struct iovec *iov, + unsigned long niov, + int maxpkts) +{ + unsigned long idx = 0; + int ret = 0; + int npkts = 0; + struct page *page = NULL; + __le32 *pbc; + dma_addr_t dma_addr; + struct qib_user_sdma_pkt *pkt = NULL; + size_t len; + size_t nw; + u32 counter = pq->counter; + int dma_mapped = 0; + + while (idx < niov && npkts < maxpkts) { + const unsigned long addr = (unsigned long) iov[idx].iov_base; + const unsigned long idx_save = idx; + unsigned pktnw; + unsigned pktnwc; + int nfrags = 0; + int npages = 0; + int cfur; + + dma_mapped = 0; + len = iov[idx].iov_len; + nw = len >> 2; + page = NULL; + + pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_list; + } + + if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH || + len > PAGE_SIZE || len & 3 || addr & 3) { + ret = -EINVAL; + goto free_pkt; + } + + if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) + pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, + &dma_addr); + else + pbc = NULL; + + if (!pbc) { + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto free_pkt; + } + pbc = kmap(page); + } + + cfur = copy_from_user(pbc, iov[idx].iov_base, len); + if (cfur) { + ret = -EFAULT; + goto free_pbc; + } + + /* + * This assignment is a bit strange. it's because the + * the pbc counts the number of 32 bit words in the full + * packet _except_ the first word of the pbc itself... + */ + pktnwc = nw - 1; + + /* + * pktnw computation yields the number of 32 bit words + * that the caller has indicated in the PBC. note that + * this is one less than the total number of words that + * goes to the send DMA engine as the first 32 bit word + * of the PBC itself is not counted. Armed with this count, + * we can verify that the packet is consistent with the + * iovec lengths. + */ + pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK; + if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { + ret = -EINVAL; + goto free_pbc; + } + + idx++; + while (pktnwc < pktnw && idx < niov) { + const size_t slen = iov[idx].iov_len; + const unsigned long faddr = + (unsigned long) iov[idx].iov_base; + + if (slen & 3 || faddr & 3 || !slen || + slen > PAGE_SIZE) { + ret = -EINVAL; + goto free_pbc; + } + + npages++; + if ((faddr & PAGE_MASK) != + ((faddr + slen - 1) & PAGE_MASK)) + npages++; + + pktnwc += slen >> 2; + idx++; + nfrags++; + } + + if (pktnwc != pktnw) { + ret = -EINVAL; + goto free_pbc; + } + + if (page) { + dma_addr = dma_map_page(&dd->pcidev->dev, + page, 0, len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = -ENOMEM; + goto free_pbc; + } + + dma_mapped = 1; + } + + qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, + page, pbc, dma_addr); + + if (nfrags) { + ret = qib_user_sdma_init_payload(dd, pq, pkt, + iov + idx_save + 1, + nfrags, npages); + if (ret < 0) + goto free_pbc_dma; + } + + counter++; + npkts++; + + list_add_tail(&pkt->list, list); + } + + ret = idx; + goto done; + +free_pbc_dma: + if (dma_mapped) + dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); +free_pbc: + if (page) { + kunmap(page); + __free_page(page); + } else + dma_pool_free(pq->header_cache, pbc, dma_addr); +free_pkt: + kmem_cache_free(pq->pkt_slab, pkt); +free_list: + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); +done: + return ret; +} + +static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq, + u32 c) +{ + pq->sent_counter = c; +} + +/* try to clean out queue -- needs pq->lock */ +static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq) +{ + struct qib_devdata *dd = ppd->dd; + struct list_head free_list; + struct qib_user_sdma_pkt *pkt; + struct qib_user_sdma_pkt *pkt_prev; + int ret = 0; + + INIT_LIST_HEAD(&free_list); + + list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { + s64 descd = ppd->sdma_descq_removed - pkt->added; + + if (descd < 0) + break; + + list_move_tail(&pkt->list, &free_list); + + /* one more packet cleaned */ + ret++; + } + + if (!list_empty(&free_list)) { + u32 counter; + + pkt = list_entry(free_list.prev, + struct qib_user_sdma_pkt, list); + counter = pkt->counter; + + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); + qib_user_sdma_set_complete_counter(pq, counter); + } + + return ret; +} + +void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) +{ + if (!pq) + return; + + kmem_cache_destroy(pq->pkt_slab); + dma_pool_destroy(pq->header_cache); + kfree(pq); +} + +/* clean descriptor queue, returns > 0 if some elements cleaned */ +static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + ret = qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + return ret; +} + +/* we're in close, drain packets so that we can cleanup successfully... */ +void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq) +{ + struct qib_devdata *dd = ppd->dd; + int i; + + if (!pq) + return; + + for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) { + mutex_lock(&pq->lock); + if (list_empty(&pq->sent)) { + mutex_unlock(&pq->lock); + break; + } + qib_user_sdma_hwqueue_clean(ppd); + qib_user_sdma_queue_clean(ppd, pq); + mutex_unlock(&pq->lock); + msleep(10); + } + + if (!list_empty(&pq->sent)) { + struct list_head free_list; + + qib_dev_err(dd, "user sdma lists not empty: forcing!\n"); + INIT_LIST_HEAD(&free_list); + mutex_lock(&pq->lock); + list_splice_init(&pq->sent, &free_list); + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); + mutex_unlock(&pq->lock); + } +} + +static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd, + u64 addr, u64 dwlen, u64 dwoffset) +{ + u8 tmpgen; + + tmpgen = ppd->sdma_generation; + + return cpu_to_le64(/* SDmaPhyAddr[31:0] */ + ((addr & 0xfffffffcULL) << 32) | + /* SDmaGeneration[1:0] */ + ((tmpgen & 3ULL) << 30) | + /* SDmaDwordCount[10:0] */ + ((dwlen & 0x7ffULL) << 16) | + /* SDmaBufOffset[12:2] */ + (dwoffset & 0x7ffULL)); +} + +static inline __le64 qib_sdma_make_first_desc0(__le64 descq) +{ + return descq | cpu_to_le64(1ULL << 12); +} + +static inline __le64 qib_sdma_make_last_desc0(__le64 descq) +{ + /* last */ /* dma head */ + return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); +} + +static inline __le64 qib_sdma_make_desc1(u64 addr) +{ + /* SDmaPhyAddr[47:32] */ + return cpu_to_le64(addr >> 32); +} + +static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, + struct qib_user_sdma_pkt *pkt, int idx, + unsigned ofs, u16 tail) +{ + const u64 addr = (u64) pkt->addr[idx].addr + + (u64) pkt->addr[idx].offset; + const u64 dwlen = (u64) pkt->addr[idx].length / 4; + __le64 *descqp; + __le64 descq0; + + descqp = &ppd->sdma_descq[tail].qw[0]; + + descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs); + if (idx == 0) + descq0 = qib_sdma_make_first_desc0(descq0); + if (idx == pkt->naddr - 1) + descq0 = qib_sdma_make_last_desc0(descq0); + + descqp[0] = descq0; + descqp[1] = qib_sdma_make_desc1(addr); +} + +/* pq->lock must be held, get packets on the wire... */ +static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + struct list_head *pktlist) +{ + struct qib_devdata *dd = ppd->dd; + int ret = 0; + unsigned long flags; + u16 tail; + u8 generation; + u64 descq_added; + + if (list_empty(pktlist)) + return 0; + + if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) + return -ECOMM; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + /* keep a copy for restoring purposes in case of problems */ + generation = ppd->sdma_generation; + descq_added = ppd->sdma_descq_added; + + if (unlikely(!__qib_sdma_running(ppd))) { + ret = -ECOMM; + goto unlock; + } + + tail = ppd->sdma_descq_tail; + while (!list_empty(pktlist)) { + struct qib_user_sdma_pkt *pkt = + list_entry(pktlist->next, struct qib_user_sdma_pkt, + list); + int i; + unsigned ofs = 0; + u16 dtail = tail; + + if (pkt->naddr > qib_sdma_descq_freecnt(ppd)) + goto unlock_check_tail; + + for (i = 0; i < pkt->naddr; i++) { + qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail); + ofs += pkt->addr[i].length >> 2; + + if (++tail == ppd->sdma_descq_cnt) { + tail = 0; + ++ppd->sdma_generation; + } + } + + if ((ofs << 2) > ppd->ibmaxlen) { + ret = -EMSGSIZE; + goto unlock; + } + + /* + * If the packet is >= 2KB mtu equivalent, we have to use + * the large buffers, and have to mark each descriptor as + * part of a large buffer packet. + */ + if (ofs > dd->piosize2kmax_dwords) { + for (i = 0; i < pkt->naddr; i++) { + ppd->sdma_descq[dtail].qw[0] |= + cpu_to_le64(1ULL << 14); + if (++dtail == ppd->sdma_descq_cnt) + dtail = 0; + } + } + + ppd->sdma_descq_added += pkt->naddr; + pkt->added = ppd->sdma_descq_added; + list_move_tail(&pkt->list, &pq->sent); + ret++; + } + +unlock_check_tail: + /* advance the tail on the chip if necessary */ + if (ppd->sdma_descq_tail != tail) + dd->f_sdma_update_tail(ppd, tail); + +unlock: + if (unlikely(ret < 0)) { + ppd->sdma_generation = generation; + ppd->sdma_descq_added = descq_added; + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + return ret; +} + +int qib_user_sdma_writev(struct qib_ctxtdata *rcd, + struct qib_user_sdma_queue *pq, + const struct iovec *iov, + unsigned long dim) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_pportdata *ppd = rcd->ppd; + int ret = 0; + struct list_head list; + int npkts = 0; + + INIT_LIST_HEAD(&list); + + mutex_lock(&pq->lock); + + /* why not -ECOMM like qib_user_sdma_push_pkts() below? */ + if (!qib_sdma_running(ppd)) + goto done_unlock; + + if (ppd->sdma_descq_added != ppd->sdma_descq_removed) { + qib_user_sdma_hwqueue_clean(ppd); + qib_user_sdma_queue_clean(ppd, pq); + } + + while (dim) { + const int mxp = 8; + + down_write(¤t->mm->mmap_sem); + ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); + up_write(¤t->mm->mmap_sem); + + if (ret <= 0) + goto done_unlock; + else { + dim -= ret; + iov += ret; + } + + /* force packets onto the sdma hw queue... */ + if (!list_empty(&list)) { + /* + * Lazily clean hw queue. the 4 is a guess of about + * how many sdma descriptors a packet will take (it + * doesn't have to be perfect). + */ + if (qib_sdma_descq_freecnt(ppd) < ret * 4) { + qib_user_sdma_hwqueue_clean(ppd); + qib_user_sdma_queue_clean(ppd, pq); + } + + ret = qib_user_sdma_push_pkts(ppd, pq, &list); + if (ret < 0) + goto done_unlock; + else { + npkts += ret; + pq->counter += ret; + + if (!list_empty(&list)) + goto done_unlock; + } + } + } + +done_unlock: + if (!list_empty(&list)) + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list); + mutex_unlock(&pq->lock); + + return (ret < 0) ? ret : npkts; +} + +int qib_user_sdma_make_progress(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq) +{ + int ret = 0; + + mutex_lock(&pq->lock); + qib_user_sdma_hwqueue_clean(ppd); + ret = qib_user_sdma_queue_clean(ppd, pq); + mutex_unlock(&pq->lock); + + return ret; +} + +u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq) +{ + return pq ? pq->sent_counter : 0; +} + +u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq) +{ + return pq ? pq->counter : 0; +} diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h new file mode 100644 index 00000000..ce8cbaf6 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_user_sdma.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include + +struct qib_user_sdma_queue; + +struct qib_user_sdma_queue * +qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport); +void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq); + +int qib_user_sdma_writev(struct qib_ctxtdata *pd, + struct qib_user_sdma_queue *pq, + const struct iovec *iov, + unsigned long dim); + +int qib_user_sdma_make_progress(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq); + +void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq); + +u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq); +u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c new file mode 100644 index 00000000..7b6c3bff --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -0,0 +1,2291 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qib.h" +#include "qib_common.h" + +static unsigned int ib_qib_qp_table_size = 256; +module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(qp_table_size, "QP table size"); + +unsigned int ib_qib_lkey_table_size = 16; +module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint, + S_IRUGO); +MODULE_PARM_DESC(lkey_table_size, + "LKEY table size in bits (2^n, 1 <= n <= 23)"); + +static unsigned int ib_qib_max_pds = 0xFFFF; +module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO); +MODULE_PARM_DESC(max_pds, + "Maximum number of protection domains to support"); + +static unsigned int ib_qib_max_ahs = 0xFFFF; +module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO); +MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); + +unsigned int ib_qib_max_cqes = 0x2FFFF; +module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO); +MODULE_PARM_DESC(max_cqes, + "Maximum number of completion queue entries to support"); + +unsigned int ib_qib_max_cqs = 0x1FFFF; +module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO); +MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); + +unsigned int ib_qib_max_qp_wrs = 0x3FFF; +module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO); +MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); + +unsigned int ib_qib_max_qps = 16384; +module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO); +MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); + +unsigned int ib_qib_max_sges = 0x60; +module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO); +MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); + +unsigned int ib_qib_max_mcast_grps = 16384; +module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO); +MODULE_PARM_DESC(max_mcast_grps, + "Maximum number of multicast groups to support"); + +unsigned int ib_qib_max_mcast_qp_attached = 16; +module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached, + uint, S_IRUGO); +MODULE_PARM_DESC(max_mcast_qp_attached, + "Maximum number of attached QPs to support"); + +unsigned int ib_qib_max_srqs = 1024; +module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO); +MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); + +unsigned int ib_qib_max_srq_sges = 128; +module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO); +MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); + +unsigned int ib_qib_max_srq_wrs = 0x1FFFF; +module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO); +MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); + +static unsigned int ib_qib_disable_sma; +module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(disable_sma, "Disable the SMA"); + +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; qib_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_qib_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = QIB_POST_RECV_OK, + [IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK, + [IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | + QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK | + QIB_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | + QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK, + [IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | + QIB_POST_SEND_OK | QIB_FLUSH_SEND, + [IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV | + QIB_POST_SEND_OK | QIB_FLUSH_SEND, +}; + +struct qib_ucontext { + struct ib_ucontext ibucontext; +}; + +static inline struct qib_ucontext *to_iucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct qib_ucontext, ibucontext); +} + +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_qib_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD +}; + +/* + * System image GUID. + */ +__be64 ib_qib_sys_image_guid; + +/** + * qib_copy_sge - copy data to SGE memory + * @ss: the SGE state + * @data: the data to copy + * @length: the length of the data + */ +void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) +{ + struct qib_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(sge->vaddr, data, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (release) + atomic_dec(&sge->mr->refcount); + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + +/** + * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func + * @ss: the SGE state + * @length: the number of bytes to skip + */ +void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) +{ + struct qib_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (release) + atomic_dec(&sge->mr->refcount); + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + length -= len; + } +} + +/* + * Count the number of DMA descriptors needed to send length bytes of data. + * Don't modify the qib_sge_state to get the count. + * Return zero if any of the segments is not aligned. + */ +static u32 qib_count_sge(struct qib_sge_state *ss, u32 length) +{ + struct qib_sge *sg_list = ss->sg_list; + struct qib_sge sge = ss->sge; + u8 num_sge = ss->num_sge; + u32 ndesc = 1; /* count the header */ + + while (length) { + u32 len = sge.length; + + if (len > length) + len = length; + if (len > sge.sge_length) + len = sge.sge_length; + BUG_ON(len == 0); + if (((long) sge.vaddr & (sizeof(u32) - 1)) || + (len != length && (len & (sizeof(u32) - 1)))) { + ndesc = 0; + break; + } + ndesc++; + sge.vaddr += len; + sge.length -= len; + sge.sge_length -= len; + if (sge.sge_length == 0) { + if (--num_sge) + sge = *sg_list++; + } else if (sge.length == 0 && sge.mr->lkey) { + if (++sge.n >= QIB_SEGSZ) { + if (++sge.m >= sge.mr->mapsz) + break; + sge.n = 0; + } + sge.vaddr = + sge.mr->map[sge.m]->segs[sge.n].vaddr; + sge.length = + sge.mr->map[sge.m]->segs[sge.n].length; + } + length -= len; + } + return ndesc; +} + +/* + * Copy from the SGEs to the data buffer. + */ +static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) +{ + struct qib_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(data, sge->vaddr, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + +/** + * qib_post_one_send - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr) +{ + struct qib_swqe *wqe; + u32 next; + int i; + int j; + int acc; + int ret; + unsigned long flags; + struct qib_lkey_table *rkt; + struct qib_pd *pd; + + spin_lock_irqsave(&qp->s_lock, flags); + + /* Check that state is OK to post send. */ + if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK))) + goto bail_inval; + + /* IB spec says that num_sge == 0 is OK. */ + if (wr->num_sge > qp->s_max_sge) + goto bail_inval; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (wr->opcode == IB_WR_FAST_REG_MR) { + if (qib_fast_reg_mr(qp, wr)) + goto bail_inval; + } else if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) + goto bail_inval; + } else if (qp->ibqp.qp_type != IB_QPT_RC) { + /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + goto bail_inval; + /* Check UD destination address PD */ + if (qp->ibqp.pd != wr->wr.ud.ah->pd) + goto bail_inval; + } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) + goto bail_inval; + else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) + goto bail_inval; + else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) + goto bail_inval; + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) { + ret = -ENOMEM; + goto bail; + } + + rkt = &to_idev(qp->ibqp.device)->lk_table; + pd = to_ipd(qp->ibqp.pd); + wqe = get_swqe_ptr(qp, qp->s_head); + wqe->wr = *wr; + wqe->length = 0; + j = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval_free; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval_free; + } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport + + qp->port_num - 1)->ibmtu) + goto bail_inval_free; + else + atomic_inc(&to_iah(wr->wr.ud.ah)->refcount); + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + ret = 0; + goto bail; + +bail_inval_free: + while (j) { + struct qib_sge *sge = &wqe->sg_list[--j]; + + atomic_dec(&sge->mr->refcount); + } +bail_inval: + ret = -EINVAL; +bail: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * qib_post_send - post a send on a QP + * @ibqp: the QP to post the send on + * @wr: the list of work requests to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct qib_qp *qp = to_iqp(ibqp); + int err = 0; + + for (; wr; wr = wr->next) { + err = qib_post_one_send(qp, wr); + if (err) { + *bad_wr = wr; + goto bail; + } + } + + /* Try to do the send work in the caller's context. */ + qib_do_send(&qp->s_work); + +bail: + return err; +} + +/** + * qib_post_receive - post a receive on a QP + * @ibqp: the QP to post the receive on + * @wr: the WR to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_rwq *wq = qp->r_rq.wq; + unsigned long flags; + int ret; + + /* Check that state is OK to post receive. */ + if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + for (; wr; wr = wr->next) { + struct qib_rwqe *wqe; + u32 next; + int i; + + if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&qp->r_rq.lock, flags); + next = wq->head + 1; + if (next >= qp->r_rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + *bad_wr = wr; + ret = -ENOMEM; + goto bail; + } + + wqe = get_rwqe_ptr(&qp->r_rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + } + ret = 0; + +bail: + return ret; +} + +/** + * qib_qp_rcv - processing an incoming packet on a QP + * @rcd: the context pointer + * @hdr: the packet header + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP the packet came on + * + * This is called from qib_ib_rcv() to process an incoming packet + * for the given QP. + * Called at interrupt level. + */ +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_ibport *ibp = &rcd->ppd->ibport_data; + + spin_lock(&qp->r_lock); + + /* Check for valid receive state. */ + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto unlock; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (ib_qib_disable_sma) + break; + /* FALLTHROUGH */ + case IB_QPT_UD: + qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp); + break; + + case IB_QPT_RC: + qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp); + break; + + case IB_QPT_UC: + qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp); + break; + + default: + break; + } + +unlock: + spin_unlock(&qp->r_lock); +} + +/** + * qib_ib_rcv - process an incoming packet + * @rcd: the context pointer + * @rhdr: the header of the packet + * @data: the packet payload + * @tlen: the packet length + * + * This is called from qib_kreceive() to process an incoming packet at + * interrupt level. Tlen is the length of the header + data + CRC in bytes. + */ +void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) +{ + struct qib_pportdata *ppd = rcd->ppd; + struct qib_ibport *ibp = &ppd->ibport_data; + struct qib_ib_header *hdr = rhdr; + struct qib_other_headers *ohdr; + struct qib_qp *qp; + u32 qp_num; + int lnh; + u8 opcode; + u16 lid; + + /* 24 == LRH+BTH+CRC */ + if (unlikely(tlen < 24)) + goto drop; + + /* Check for a valid destination LID (see ch. 7.11.1). */ + lid = be16_to_cpu(hdr->lrh[1]); + if (lid < QIB_MULTICAST_LID_BASE) { + lid &= ~((1 << ppd->lmc) - 1); + if (unlikely(lid != ppd->lid)) + goto drop; + } + + /* Check for GRH */ + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + if (lnh == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else if (lnh == QIB_LRH_GRH) { + u32 vtf; + + ohdr = &hdr->u.l.oth; + if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else + goto drop; + + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + ibp->opstats[opcode & 0x7f].n_bytes += tlen; + ibp->opstats[opcode & 0x7f].n_packets++; + + /* Get the destination QP number. */ + qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + if (qp_num == QIB_MULTICAST_QPN) { + struct qib_mcast *mcast; + struct qib_mcast_qp *p; + + if (lnh != QIB_LRH_GRH) + goto drop; + mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); + if (mcast == NULL) + goto drop; + ibp->n_multicast_rcv++; + list_for_each_entry_rcu(p, &mcast->qp_list, list) + qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + /* + * Notify qib_multicast_detach() if it is waiting for us + * to finish. + */ + if (atomic_dec_return(&mcast->refcount) <= 1) + wake_up(&mcast->wait); + } else { + if (rcd->lookaside_qp) { + if (rcd->lookaside_qpn != qp_num) { + if (atomic_dec_and_test( + &rcd->lookaside_qp->refcount)) + wake_up( + &rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } + } + if (!rcd->lookaside_qp) { + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + rcd->lookaside_qp = qp; + rcd->lookaside_qpn = qp_num; + } else + qp = rcd->lookaside_qp; + ibp->n_unicast_rcv++; + qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); + } + return; + +drop: + ibp->n_pkt_drops++; +} + +/* + * This is called from a timer to check for QPs + * which need kernel memory in order to send a packet. + */ +static void mem_timer(unsigned long data) +{ + struct qib_ibdev *dev = (struct qib_ibdev *) data; + struct list_head *list = &dev->memwait; + struct qib_qp *qp = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + if (!list_empty(list)) { + qp = list_entry(list->next, struct qib_qp, iowait); + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + if (!list_empty(list)) + mod_timer(&dev->mem_timer, jiffies + 1); + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + + if (qp) { + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_KMEM) { + qp->s_flags &= ~QIB_S_WAIT_KMEM; + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +static void update_sge(struct qib_sge_state *ss, u32 length) +{ + struct qib_sge *sge = &ss->sge; + + sge->vaddr += length; + sge->length -= length; + sge->sge_length -= length; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + return; + sge->n = 0; + } + sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = sge->mr->map[sge->m]->segs[sge->n].length; + } +} + +#ifdef __LITTLE_ENDIAN +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); + data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#else +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); + data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#endif + +static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, + u32 length, unsigned flush_wc) +{ + u32 extra = 0; + u32 data = 0; + u32 last; + + while (1) { + u32 len = ss->sge.length; + u32 off; + + if (len > length) + len = length; + if (len > ss->sge.sge_length) + len = ss->sge.sge_length; + BUG_ON(len == 0); + /* If the source address is not aligned, try to align it. */ + off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); + if (off) { + u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & + ~(sizeof(u32) - 1)); + u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); + u32 y; + + y = sizeof(u32) - off; + if (len > y) + len = y; + if (len + extra >= sizeof(u32)) { + data |= set_upper_bits(v, extra * + BITS_PER_BYTE); + len = sizeof(u32) - extra; + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, len, extra); + if (len == length) { + last = data; + break; + } + extra += len; + } + } else if (extra) { + /* Source address is aligned. */ + u32 *addr = (u32 *) ss->sge.vaddr; + int shift = extra * BITS_PER_BYTE; + int ushift = 32 - shift; + u32 l = len; + + while (l >= sizeof(u32)) { + u32 v = *addr; + + data |= set_upper_bits(v, shift); + __raw_writel(data, piobuf); + data = get_upper_bits(v, ushift); + piobuf++; + addr++; + l -= sizeof(u32); + } + /* + * We still have 'extra' number of bytes leftover. + */ + if (l) { + u32 v = *addr; + + if (l + extra >= sizeof(u32)) { + data |= set_upper_bits(v, shift); + len -= l + extra - sizeof(u32); + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, l, extra); + if (len == length) { + last = data; + break; + } + extra += l; + } + } else if (len == length) { + last = data; + break; + } + } else if (len == length) { + u32 w; + + /* + * Need to round up for the last dword in the + * packet. + */ + w = (len + 3) >> 2; + qib_pio_copy(piobuf, ss->sge.vaddr, w - 1); + piobuf += w - 1; + last = ((u32 *) ss->sge.vaddr)[w - 1]; + break; + } else { + u32 w = len >> 2; + + qib_pio_copy(piobuf, ss->sge.vaddr, w); + piobuf += w; + + extra = len & (sizeof(u32) - 1); + if (extra) { + u32 v = ((u32 *) ss->sge.vaddr)[w]; + + /* Clear unused upper bytes */ + data = clear_upper_bytes(v, extra, 0); + } + } + update_sge(ss, len); + length -= len; + } + /* Update address before sending packet. */ + update_sge(ss, length); + if (flush_wc) { + /* must flush early everything before trigger word */ + qib_flush_wc(); + __raw_writel(last, piobuf); + /* be sure trigger word is written */ + qib_flush_wc(); + } else + __raw_writel(last, piobuf); +} + +static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) +{ + struct qib_verbs_txreq *tx; + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + spin_lock(&dev->pending_lock); + + if (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + } else { + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && + list_empty(&qp->iowait)) { + dev->n_txwait++; + qp->s_flags |= QIB_S_WAIT_TX; + list_add_tail(&qp->iowait, &dev->txwait); + } + qp->s_flags &= ~QIB_S_BUSY; + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); + tx = ERR_PTR(-EBUSY); + } + return tx; +} + +static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) +{ + struct qib_verbs_txreq *tx; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + /* assume the list non empty */ + if (likely(!list_empty(&dev->txreq_free))) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + } else { + /* call slow path to get the extra lock */ + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = __get_txreq(dev, qp); + } + return tx; +} + +void qib_put_txreq(struct qib_verbs_txreq *tx) +{ + struct qib_ibdev *dev; + struct qib_qp *qp; + unsigned long flags; + + qp = tx->qp; + dev = to_idev(qp->ibqp.device); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + if (tx->mr) { + atomic_dec(&tx->mr->refcount); + tx->mr = NULL; + } + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { + tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF; + dma_unmap_single(&dd_from_dev(dev)->pcidev->dev, + tx->txreq.addr, tx->hdr_dwords << 2, + DMA_TO_DEVICE); + kfree(tx->align_buf); + } + + spin_lock_irqsave(&dev->pending_lock, flags); + + /* Put struct back on free list */ + list_add(&tx->txreq.list, &dev->txreq_free); + + if (!list_empty(&dev->txwait)) { + /* Wake up first QP wanting a free struct */ + qp = list_entry(dev->txwait.next, struct qib_qp, iowait); + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + spin_unlock_irqrestore(&dev->pending_lock, flags); + + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_TX) { + qp->s_flags &= ~QIB_S_WAIT_TX; + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } else + spin_unlock_irqrestore(&dev->pending_lock, flags); +} + +/* + * This is called when there are send DMA descriptors that might be + * available. + * + * This is called with ppd->sdma_lock held. + */ +void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) +{ + struct qib_qp *qp, *nqp; + struct qib_qp *qps[20]; + struct qib_ibdev *dev; + unsigned i, n; + + n = 0; + dev = &ppd->dd->verbs_dev; + spin_lock(&dev->pending_lock); + + /* Search wait list for first QP wanting DMA descriptors. */ + list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) { + if (qp->port_num != ppd->port) + continue; + if (n == ARRAY_SIZE(qps)) + break; + if (qp->s_tx->txreq.sg_count > avail) + break; + avail -= qp->s_tx->txreq.sg_count; + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + qps[n++] = qp; + } + + spin_unlock(&dev->pending_lock); + + for (i = 0; i < n; i++) { + qp = qps[i]; + spin_lock(&qp->s_lock); + if (qp->s_flags & QIB_S_WAIT_DMA_DESC) { + qp->s_flags &= ~QIB_S_WAIT_DMA_DESC; + qib_schedule_send(qp); + } + spin_unlock(&qp->s_lock); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +/* + * This is called with ppd->sdma_lock held. + */ +static void sdma_complete(struct qib_sdma_txreq *cookie, int status) +{ + struct qib_verbs_txreq *tx = + container_of(cookie, struct qib_verbs_txreq, txreq); + struct qib_qp *qp = tx->qp; + + spin_lock(&qp->s_lock); + if (tx->wqe) + qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); + else if (qp->ibqp.qp_type == IB_QPT_RC) { + struct qib_ib_header *hdr; + + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) + hdr = &tx->align_buf->hdr; + else { + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + + hdr = &dev->pio_hdrs[tx->hdr_inx].hdr; + } + qib_rc_send_complete(qp, hdr); + } + if (atomic_dec_and_test(&qp->s_dma_busy)) { + if (qp->state == IB_QPS_RESET) + wake_up(&qp->wait_dma); + else if (qp->s_flags & QIB_S_WAIT_DMA) { + qp->s_flags &= ~QIB_S_WAIT_DMA; + qib_schedule_send(qp); + } + } + spin_unlock(&qp->s_lock); + + qib_put_txreq(tx); +} + +static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + spin_lock(&dev->pending_lock); + if (list_empty(&qp->iowait)) { + if (list_empty(&dev->memwait)) + mod_timer(&dev->mem_timer, jiffies + 1); + qp->s_flags |= QIB_S_WAIT_KMEM; + list_add_tail(&qp->iowait, &dev->memwait); + } + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~QIB_S_BUSY; + ret = -EBUSY; + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + return ret; +} + +static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_verbs_txreq *tx; + struct qib_pio_header *phdr; + u32 control; + u32 ndesc; + int ret; + + tx = qp->s_tx; + if (tx) { + qp->s_tx = NULL; + /* resend previously constructed packet */ + ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx); + goto bail; + } + + tx = get_txreq(dev, qp); + if (IS_ERR(tx)) + goto bail_tx; + + control = dd->f_setpbc_control(ppd, plen, qp->s_srate, + be16_to_cpu(hdr->lrh[0]) >> 12); + tx->qp = qp; + atomic_inc(&qp->refcount); + tx->wqe = qp->s_wqe; + tx->mr = qp->s_rdma_mr; + if (qp->s_rdma_mr) + qp->s_rdma_mr = NULL; + tx->txreq.callback = sdma_complete; + if (dd->flags & QIB_HAS_SDMA_TIMEOUT) + tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST; + else + tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ; + if (plen + 1 > dd->piosize2kmax_dwords) + tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF; + + if (len) { + /* + * Don't try to DMA if it takes more descriptors than + * the queue holds. + */ + ndesc = qib_count_sge(ss, len); + if (ndesc >= ppd->sdma_descq_cnt) + ndesc = 0; + } else + ndesc = 1; + if (ndesc) { + phdr = &dev->pio_hdrs[tx->hdr_inx]; + phdr->pbc[0] = cpu_to_le32(plen); + phdr->pbc[1] = cpu_to_le32(control); + memcpy(&phdr->hdr, hdr, hdrwords << 2); + tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC; + tx->txreq.sg_count = ndesc; + tx->txreq.addr = dev->pio_hdrs_phys + + tx->hdr_inx * sizeof(struct qib_pio_header); + tx->hdr_dwords = hdrwords + 2; /* add PBC length */ + ret = qib_sdma_verbs_send(ppd, ss, dwords, tx); + goto bail; + } + + /* Allocate a buffer and copy the header and payload to it. */ + tx->hdr_dwords = plen + 1; + phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC); + if (!phdr) + goto err_tx; + phdr->pbc[0] = cpu_to_le32(plen); + phdr->pbc[1] = cpu_to_le32(control); + memcpy(&phdr->hdr, hdr, hdrwords << 2); + qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len); + + tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr, + tx->hdr_dwords << 2, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr)) + goto map_err; + tx->align_buf = phdr; + tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF; + tx->txreq.sg_count = 1; + ret = qib_sdma_verbs_send(ppd, NULL, 0, tx); + goto unaligned; + +map_err: + kfree(phdr); +err_tx: + qib_put_txreq(tx); + ret = wait_kmem(dev, qp); +unaligned: + ibp->n_unaligned++; +bail: + return ret; +bail_tx: + ret = PTR_ERR(tx); + goto bail; +} + +/* + * If we are now in the error state, return zero to flush the + * send work request. + */ +static int no_bufs_available(struct qib_qp *qp) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct qib_devdata *dd; + unsigned long flags; + int ret = 0; + + /* + * Note that as soon as want_buffer() is called and + * possibly before it returns, qib_ib_piobufavail() + * could be called. Therefore, put QP on the I/O wait list before + * enabling the PIO avail interrupt. + */ + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + spin_lock(&dev->pending_lock); + if (list_empty(&qp->iowait)) { + dev->n_piowait++; + qp->s_flags |= QIB_S_WAIT_PIO; + list_add_tail(&qp->iowait, &dev->piowait); + dd = dd_from_dev(dev); + dd->f_wantpiobuf_intr(dd, 1); + } + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~QIB_S_BUSY; + ret = -EBUSY; + } + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct qib_pportdata *ppd = dd->pport + qp->port_num - 1; + u32 *hdr = (u32 *) ibhdr; + u32 __iomem *piobuf_orig; + u32 __iomem *piobuf; + u64 pbc; + unsigned long flags; + unsigned flush_wc; + u32 control; + u32 pbufn; + + control = dd->f_setpbc_control(ppd, plen, qp->s_srate, + be16_to_cpu(ibhdr->lrh[0]) >> 12); + pbc = ((u64) control << 32) | plen; + piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn); + if (unlikely(piobuf == NULL)) + return no_bufs_available(qp); + + /* + * Write the pbc. + * We have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order. + */ + writeq(pbc, piobuf); + piobuf_orig = piobuf; + piobuf += 2; + + flush_wc = dd->flags & QIB_PIO_FLUSH_WC; + if (len == 0) { + /* + * If there is just the header portion, must flush before + * writing last word of header for correctness, and after + * the last header word (trigger word). + */ + if (flush_wc) { + qib_flush_wc(); + qib_pio_copy(piobuf, hdr, hdrwords - 1); + qib_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); + qib_flush_wc(); + } else + qib_pio_copy(piobuf, hdr, hdrwords); + goto done; + } + + if (flush_wc) + qib_flush_wc(); + qib_pio_copy(piobuf, hdr, hdrwords); + piobuf += hdrwords; + + /* The common case is aligned and contained in one segment. */ + if (likely(ss->num_sge == 1 && len <= ss->sge.length && + !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { + u32 *addr = (u32 *) ss->sge.vaddr; + + /* Update address before sending packet. */ + update_sge(ss, len); + if (flush_wc) { + qib_pio_copy(piobuf, addr, dwords - 1); + /* must flush early everything before trigger word */ + qib_flush_wc(); + __raw_writel(addr[dwords - 1], piobuf + dwords - 1); + /* be sure trigger word is written */ + qib_flush_wc(); + } else + qib_pio_copy(piobuf, addr, dwords); + goto done; + } + copy_io(piobuf, ss, len, flush_wc); +done: + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf_orig + spcl_off); + } + qib_sendbuf_done(dd, pbufn); + if (qp->s_rdma_mr) { + atomic_dec(&qp->s_rdma_mr->refcount); + qp->s_rdma_mr = NULL; + } + if (qp->s_wqe) { + spin_lock_irqsave(&qp->s_lock, flags); + qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); + spin_unlock_irqrestore(&qp->s_lock, flags); + } else if (qp->ibqp.qp_type == IB_QPT_RC) { + spin_lock_irqsave(&qp->s_lock, flags); + qib_rc_send_complete(qp, ibhdr); + spin_unlock_irqrestore(&qp->s_lock, flags); + } + return 0; +} + +/** + * qib_verbs_send - send a packet + * @qp: the QP to send on + * @hdr: the packet header + * @hdrwords: the number of 32-bit words in the header + * @ss: the SGE to send + * @len: the length of the packet in bytes + * + * Return zero if packet is sent or queued OK. + * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise. + */ +int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len) +{ + struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); + u32 plen; + int ret; + u32 dwords = (len + 3) >> 2; + + /* + * Calculate the send buffer trigger address. + * The +1 counts for the pbc control dword following the pbc length. + */ + plen = hdrwords + dwords + 1; + + /* + * VL15 packets (IB_QPT_SMI) will always use PIO, so we + * can defer SDMA restart until link goes ACTIVE without + * worrying about just how we got there. + */ + if (qp->ibqp.qp_type == IB_QPT_SMI || + !(dd->flags & QIB_HAS_SEND_DMA)) + ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len, + plen, dwords); + else + ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len, + plen, dwords); + + return ret; +} + +int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait) +{ + int ret; + struct qib_devdata *dd = ppd->dd; + + if (!(dd->flags & QIB_PRESENT)) { + /* no hardware, freeze, etc. */ + ret = -EINVAL; + goto bail; + } + *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND); + *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV); + *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND); + *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV); + *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL); + + ret = 0; + +bail: + return ret; +} + +/** + * qib_get_counters - get various chip counters + * @dd: the qlogic_ib device + * @cntrs: counters are placed here + * + * Return the counters needed by recv_pma_get_portcounters(). + */ +int qib_get_counters(struct qib_pportdata *ppd, + struct qib_verbs_counters *cntrs) +{ + int ret; + + if (!(ppd->dd->flags & QIB_PRESENT)) { + /* no hardware, freeze, etc. */ + ret = -EINVAL; + goto bail; + } + cntrs->symbol_error_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR); + cntrs->link_error_recovery_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ + cntrs->link_downed_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN); + cntrs->port_rcv_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT); + cntrs->port_rcv_errors += + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR); + cntrs->port_rcv_errors += + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR); + cntrs->port_rcv_remphys_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP); + cntrs->port_xmit_discards = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL); + cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_WORDSEND); + cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_WORDRCV); + cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_PKTSEND); + cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_PKTRCV); + cntrs->local_link_integrity_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI); + cntrs->excessive_buffer_overrun_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL); + cntrs->vl15_dropped = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP); + + ret = 0; + +bail: + return ret; +} + +/** + * qib_ib_piobufavail - callback when a PIO buffer is available + * @dd: the device pointer + * + * This is called from qib_intr() at interrupt level when a PIO buffer is + * available after qib_verbs_send() returned an error that no buffers were + * available. Disable the interrupt if there are no more QPs waiting. + */ +void qib_ib_piobufavail(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + struct list_head *list; + struct qib_qp *qps[5]; + struct qib_qp *qp; + unsigned long flags; + unsigned i, n; + + list = &dev->piowait; + n = 0; + + /* + * Note: checking that the piowait list is empty and clearing + * the buffer available interrupt needs to be atomic or we + * could end up with QPs on the wait list with the interrupt + * disabled. + */ + spin_lock_irqsave(&dev->pending_lock, flags); + while (!list_empty(list)) { + if (n == ARRAY_SIZE(qps)) + goto full; + qp = list_entry(list->next, struct qib_qp, iowait); + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + qps[n++] = qp; + } + dd->f_wantpiobuf_intr(dd, 0); +full: + spin_unlock_irqrestore(&dev->pending_lock, flags); + + for (i = 0; i < n; i++) { + qp = qps[i]; + + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_PIO) { + qp->s_flags &= ~QIB_S_WAIT_PIO; + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify qib_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +static int qib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + struct qib_ibdev *dev = to_idev(ibdev); + + memset(props, 0, sizeof(*props)); + + props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | + IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; + props->page_size_cap = PAGE_SIZE; + props->vendor_id = + QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3; + props->vendor_part_id = dd->deviceid; + props->hw_ver = dd->minrev; + props->sys_image_guid = ib_qib_sys_image_guid; + props->max_mr_size = ~0ULL; + props->max_qp = ib_qib_max_qps; + props->max_qp_wr = ib_qib_max_qp_wrs; + props->max_sge = ib_qib_max_sges; + props->max_cq = ib_qib_max_cqs; + props->max_ah = ib_qib_max_ahs; + props->max_cqe = ib_qib_max_cqes; + props->max_mr = dev->lk_table.max; + props->max_fmr = dev->lk_table.max; + props->max_map_per_fmr = 32767; + props->max_pd = ib_qib_max_pds; + props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; + props->max_qp_init_rd_atom = 255; + /* props->max_res_rd_atom */ + props->max_srq = ib_qib_max_srqs; + props->max_srq_wr = ib_qib_max_srq_wrs; + props->max_srq_sge = ib_qib_max_srq_sges; + /* props->local_ca_ack_delay */ + props->atomic_cap = IB_ATOMIC_GLOB; + props->max_pkeys = qib_get_npkeys(dd); + props->max_mcast_grp = ib_qib_max_mcast_grps; + props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + + return 0; +} + +static int qib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + enum ib_mtu mtu; + u16 lid = ppd->lid; + + memset(props, 0, sizeof(*props)); + props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); + props->lmc = ppd->lmc; + props->sm_lid = ibp->sm_lid; + props->sm_sl = ibp->sm_sl; + props->state = dd->f_iblink_state(ppd->lastibcstat); + props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat); + props->port_cap_flags = ibp->port_cap_flags; + props->gid_tbl_len = QIB_GUIDS_PER_PORT; + props->max_msg_sz = 0x80000000; + props->pkey_tbl_len = qib_get_npkeys(dd); + props->bad_pkey_cntr = ibp->pkey_violations; + props->qkey_viol_cntr = ibp->qkey_violations; + props->active_width = ppd->link_width_active; + /* See rate_show() */ + props->active_speed = ppd->link_speed_active; + props->max_vl_num = qib_num_vls(ppd->vls_supported); + props->init_type_reply = 0; + + props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; + switch (ppd->ibmtu) { + case 4096: + mtu = IB_MTU_4096; + break; + case 2048: + mtu = IB_MTU_2048; + break; + case 1024: + mtu = IB_MTU_1024; + break; + case 512: + mtu = IB_MTU_512; + break; + case 256: + mtu = IB_MTU_256; + break; + default: + mtu = IB_MTU_2048; + } + props->active_mtu = mtu; + props->subnet_timeout = ibp->subnet_timeout; + + return 0; +} + +static int qib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + struct qib_devdata *dd = dd_from_ibdev(device); + unsigned i; + int ret; + + if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) { + ret = -EOPNOTSUPP; + goto bail; + } + + if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { + memcpy(device->node_desc, device_modify->node_desc, 64); + for (i = 0; i < dd->num_pports; i++) { + struct qib_ibport *ibp = &dd->pport[i].ibport_data; + + qib_node_desc_chg(ibp); + } + } + + if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { + ib_qib_sys_image_guid = + cpu_to_be64(device_modify->sys_image_guid); + for (i = 0; i < dd->num_pports; i++) { + struct qib_ibport *ibp = &dd->pport[i].ibport_data; + + qib_sys_guid_chg(ibp); + } + } + + ret = 0; + +bail: + return ret; +} + +static int qib_modify_port(struct ib_device *ibdev, u8 port, + int port_modify_mask, struct ib_port_modify *props) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + ibp->port_cap_flags |= props->set_port_cap_mask; + ibp->port_cap_flags &= ~props->clr_port_cap_mask; + if (props->set_port_cap_mask || props->clr_port_cap_mask) + qib_cap_mask_chg(ibp); + if (port_modify_mask & IB_PORT_SHUTDOWN) + qib_set_linkstate(ppd, QIB_IB_LINKDOWN); + if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) + ibp->qkey_violations = 0; + return 0; +} + +static int qib_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + int ret = 0; + + if (!port || port > dd->num_pports) + ret = -EINVAL; + else { + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + gid->global.subnet_prefix = ibp->gid_prefix; + if (index == 0) + gid->global.interface_id = ppd->guid; + else if (index < QIB_GUIDS_PER_PORT) + gid->global.interface_id = ibp->guids[index - 1]; + else + ret = -EINVAL; + } + + return ret; +} + +static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_pd *pd; + struct ib_pd *ret; + + /* + * This is actually totally arbitrary. Some correctness tests + * assume there's a maximum number of PDs that can be allocated. + * We don't actually have this limit, but we fail the test if + * we allow allocations of more than we report for this value. + */ + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == ib_qib_max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); + + /* ib_alloc_pd() will initialize pd->ibpd. */ + pd->user = udata != NULL; + + ret = &pd->ibpd; + +bail: + return ret; +} + +static int qib_dealloc_pd(struct ib_pd *ibpd) +{ + struct qib_pd *pd = to_ipd(ibpd); + struct qib_ibdev *dev = to_idev(ibpd->device); + + spin_lock(&dev->n_pds_lock); + dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); + + kfree(pd); + + return 0; +} + +int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) +{ + /* A multicast address requires a GRH (see ch. 8.4.1). */ + if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE && + ah_attr->dlid != QIB_PERMISSIVE_LID && + !(ah_attr->ah_flags & IB_AH_GRH)) + goto bail; + if ((ah_attr->ah_flags & IB_AH_GRH) && + ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT) + goto bail; + if (ah_attr->dlid == 0) + goto bail; + if (ah_attr->port_num < 1 || + ah_attr->port_num > ibdev->phys_port_cnt) + goto bail; + if (ah_attr->static_rate != IB_RATE_PORT_CURRENT && + ib_rate_to_mult(ah_attr->static_rate) < 0) + goto bail; + if (ah_attr->sl > 15) + goto bail; + return 0; +bail: + return -EINVAL; +} + +/** + * qib_create_ah - create an address handle + * @pd: the protection domain + * @ah_attr: the attributes of the AH + * + * This may be called from interrupt context. + */ +static struct ib_ah *qib_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + struct qib_ah *ah; + struct ib_ah *ret; + struct qib_ibdev *dev = to_idev(pd->device); + unsigned long flags; + + if (qib_check_ah(pd->device, ah_attr)) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + ah = kmalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + if (dev->n_ahs_allocated == ib_qib_max_ahs) { + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + kfree(ah); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_ahs_allocated++; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + /* ib_create_ah() will initialize ah->ibah. */ + ah->attr = *ah_attr; + atomic_set(&ah->refcount, 0); + + ret = &ah->ibah; + +bail: + return ret; +} + +/** + * qib_destroy_ah - destroy an address handle + * @ibah: the AH to destroy + * + * This may be called from interrupt context. + */ +static int qib_destroy_ah(struct ib_ah *ibah) +{ + struct qib_ibdev *dev = to_idev(ibah->device); + struct qib_ah *ah = to_iah(ibah); + unsigned long flags; + + if (atomic_read(&ah->refcount) != 0) + return -EBUSY; + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + dev->n_ahs_allocated--; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + kfree(ah); + + return 0; +} + +static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct qib_ah *ah = to_iah(ibah); + + if (qib_check_ah(ibah->device, ah_attr)) + return -EINVAL; + + ah->attr = *ah_attr; + + return 0; +} + +static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct qib_ah *ah = to_iah(ibah); + + *ah_attr = ah->attr; + + return 0; +} + +/** + * qib_get_npkeys - return the size of the PKEY table for context 0 + * @dd: the qlogic_ib device + */ +unsigned qib_get_npkeys(struct qib_devdata *dd) +{ + return ARRAY_SIZE(dd->rcd[0]->pkeys); +} + +/* + * Return the indexed PKEY from the port PKEY table. + * No need to validate rcd[ctxt]; the port is setup if we are here. + */ +unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index) +{ + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = ppd->dd; + unsigned ctxt = ppd->hw_pidx; + unsigned ret; + + /* dd->rcd null if mini_init or some init failures */ + if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys)) + ret = 0; + else + ret = dd->rcd[ctxt]->pkeys[index]; + + return ret; +} + +static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + int ret; + + if (index >= qib_get_npkeys(dd)) { + ret = -EINVAL; + goto bail; + } + + *pkey = qib_get_pkey(to_iport(ibdev, port), index); + ret = 0; + +bail: + return ret; +} + +/** + * qib_alloc_ucontext - allocate a ucontest + * @ibdev: the infiniband device + * @udata: not used by the QLogic_IB driver + */ + +static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct qib_ucontext *context; + struct ib_ucontext *ret; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + ret = &context->ibucontext; + +bail: + return ret; +} + +static int qib_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(to_iucontext(context)); + return 0; +} + +static void init_ibport(struct qib_pportdata *ppd) +{ + struct qib_verbs_counters cntrs; + struct qib_ibport *ibp = &ppd->ibport_data; + + spin_lock_init(&ibp->lock); + /* Set the prefix to the default value (see ch. 4.1.1) */ + ibp->gid_prefix = IB_DEFAULT_GID_PREFIX; + ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); + ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP | + IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP | + IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP | + IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP | + IB_PORT_OTHER_LOCAL_CHANGES_SUP; + if (ppd->dd->flags & QIB_HAS_LINK_LATENCY) + ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; + ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; + ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; + ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; + ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; + ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; + + /* Snapshot current HW counters to "clear" them. */ + qib_get_counters(ppd, &cntrs); + ibp->z_symbol_error_counter = cntrs.symbol_error_counter; + ibp->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + ibp->z_link_downed_counter = cntrs.link_downed_counter; + ibp->z_port_rcv_errors = cntrs.port_rcv_errors; + ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors; + ibp->z_port_xmit_discards = cntrs.port_xmit_discards; + ibp->z_port_xmit_data = cntrs.port_xmit_data; + ibp->z_port_rcv_data = cntrs.port_rcv_data; + ibp->z_port_xmit_packets = cntrs.port_xmit_packets; + ibp->z_port_rcv_packets = cntrs.port_rcv_packets; + ibp->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + ibp->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + ibp->z_vl15_dropped = cntrs.vl15_dropped; + RCU_INIT_POINTER(ibp->qp0, NULL); + RCU_INIT_POINTER(ibp->qp1, NULL); +} + +/** + * qib_register_ib_device - register our device with the infiniband core + * @dd: the device data structure + * Return the allocated qib_ibdev pointer or NULL on error. + */ +int qib_register_ib_device(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + struct ib_device *ibdev = &dev->ibdev; + struct qib_pportdata *ppd = dd->pport; + unsigned i, lk_tab_size; + int ret; + + dev->qp_table_size = ib_qib_qp_table_size; + get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); + dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table, + GFP_KERNEL); + if (!dev->qp_table) { + ret = -ENOMEM; + goto err_qpt; + } + for (i = 0; i < dev->qp_table_size; i++) + RCU_INIT_POINTER(dev->qp_table[i], NULL); + + for (i = 0; i < dd->num_pports; i++) + init_ibport(ppd + i); + + /* Only need to initialize non-zero fields. */ + spin_lock_init(&dev->qpt_lock); + spin_lock_init(&dev->n_pds_lock); + spin_lock_init(&dev->n_ahs_lock); + spin_lock_init(&dev->n_cqs_lock); + spin_lock_init(&dev->n_qps_lock); + spin_lock_init(&dev->n_srqs_lock); + spin_lock_init(&dev->n_mcast_grps_lock); + init_timer(&dev->mem_timer); + dev->mem_timer.function = mem_timer; + dev->mem_timer.data = (unsigned long) dev; + + qib_init_qpn_table(dd, &dev->qpn_table); + + /* + * The top ib_qib_lkey_table_size bits are used to index the + * table. The lower 8 bits can be owned by the user (copied from + * the LKEY). The remaining bits act as a generation number or tag. + */ + spin_lock_init(&dev->lk_table.lock); + dev->lk_table.max = 1 << ib_qib_lkey_table_size; + lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); + dev->lk_table.table = (struct qib_mregion **) + __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); + if (dev->lk_table.table == NULL) { + ret = -ENOMEM; + goto err_lk; + } + memset(dev->lk_table.table, 0, lk_tab_size); + INIT_LIST_HEAD(&dev->pending_mmaps); + spin_lock_init(&dev->pending_lock); + dev->mmap_offset = PAGE_SIZE; + spin_lock_init(&dev->mmap_offset_lock); + INIT_LIST_HEAD(&dev->piowait); + INIT_LIST_HEAD(&dev->dmawait); + INIT_LIST_HEAD(&dev->txwait); + INIT_LIST_HEAD(&dev->memwait); + INIT_LIST_HEAD(&dev->txreq_free); + + if (ppd->sdma_descq_cnt) { + dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev, + ppd->sdma_descq_cnt * + sizeof(struct qib_pio_header), + &dev->pio_hdrs_phys, + GFP_KERNEL); + if (!dev->pio_hdrs) { + ret = -ENOMEM; + goto err_hdrs; + } + } + + for (i = 0; i < ppd->sdma_descq_cnt; i++) { + struct qib_verbs_txreq *tx; + + tx = kzalloc(sizeof *tx, GFP_KERNEL); + if (!tx) { + ret = -ENOMEM; + goto err_tx; + } + tx->hdr_inx = i; + list_add(&tx->txreq.list, &dev->txreq_free); + } + + /* + * The system image GUID is supposed to be the same for all + * IB HCAs in a single system but since there can be other + * device types in the system, we can't be sure this is unique. + */ + if (!ib_qib_sys_image_guid) + ib_qib_sys_image_guid = ppd->guid; + + strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX); + ibdev->owner = THIS_MODULE; + ibdev->node_guid = ppd->guid; + ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION; + ibdev->uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | + (1ull << IB_USER_VERBS_CMD_QUERY_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ibdev->node_type = RDMA_NODE_IB_CA; + ibdev->phys_port_cnt = dd->num_pports; + ibdev->num_comp_vectors = 1; + ibdev->dma_device = &dd->pcidev->dev; + ibdev->query_device = qib_query_device; + ibdev->modify_device = qib_modify_device; + ibdev->query_port = qib_query_port; + ibdev->modify_port = qib_modify_port; + ibdev->query_pkey = qib_query_pkey; + ibdev->query_gid = qib_query_gid; + ibdev->alloc_ucontext = qib_alloc_ucontext; + ibdev->dealloc_ucontext = qib_dealloc_ucontext; + ibdev->alloc_pd = qib_alloc_pd; + ibdev->dealloc_pd = qib_dealloc_pd; + ibdev->create_ah = qib_create_ah; + ibdev->destroy_ah = qib_destroy_ah; + ibdev->modify_ah = qib_modify_ah; + ibdev->query_ah = qib_query_ah; + ibdev->create_srq = qib_create_srq; + ibdev->modify_srq = qib_modify_srq; + ibdev->query_srq = qib_query_srq; + ibdev->destroy_srq = qib_destroy_srq; + ibdev->create_qp = qib_create_qp; + ibdev->modify_qp = qib_modify_qp; + ibdev->query_qp = qib_query_qp; + ibdev->destroy_qp = qib_destroy_qp; + ibdev->post_send = qib_post_send; + ibdev->post_recv = qib_post_receive; + ibdev->post_srq_recv = qib_post_srq_receive; + ibdev->create_cq = qib_create_cq; + ibdev->destroy_cq = qib_destroy_cq; + ibdev->resize_cq = qib_resize_cq; + ibdev->poll_cq = qib_poll_cq; + ibdev->req_notify_cq = qib_req_notify_cq; + ibdev->get_dma_mr = qib_get_dma_mr; + ibdev->reg_phys_mr = qib_reg_phys_mr; + ibdev->reg_user_mr = qib_reg_user_mr; + ibdev->dereg_mr = qib_dereg_mr; + ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr; + ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list; + ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list; + ibdev->alloc_fmr = qib_alloc_fmr; + ibdev->map_phys_fmr = qib_map_phys_fmr; + ibdev->unmap_fmr = qib_unmap_fmr; + ibdev->dealloc_fmr = qib_dealloc_fmr; + ibdev->attach_mcast = qib_multicast_attach; + ibdev->detach_mcast = qib_multicast_detach; + ibdev->process_mad = qib_process_mad; + ibdev->mmap = qib_mmap; + ibdev->dma_ops = &qib_dma_mapping_ops; + + snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), + QIB_IDSTR " %s", init_utsname()->nodename); + + ret = ib_register_device(ibdev, qib_create_port_files); + if (ret) + goto err_reg; + + ret = qib_create_agents(dev); + if (ret) + goto err_agents; + + if (qib_verbs_register_sysfs(dd)) + goto err_class; + + goto bail; + +err_class: + qib_free_agents(dev); +err_agents: + ib_unregister_device(ibdev); +err_reg: +err_tx: + while (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + struct qib_verbs_txreq *tx; + + list_del(l); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + kfree(tx); + } + if (ppd->sdma_descq_cnt) + dma_free_coherent(&dd->pcidev->dev, + ppd->sdma_descq_cnt * + sizeof(struct qib_pio_header), + dev->pio_hdrs, dev->pio_hdrs_phys); +err_hdrs: + free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size)); +err_lk: + kfree(dev->qp_table); +err_qpt: + qib_dev_err(dd, "cannot register verbs: %d!\n", -ret); +bail: + return ret; +} + +void qib_unregister_ib_device(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + struct ib_device *ibdev = &dev->ibdev; + u32 qps_inuse; + unsigned lk_tab_size; + + qib_verbs_unregister_sysfs(dd); + + qib_free_agents(dev); + + ib_unregister_device(ibdev); + + if (!list_empty(&dev->piowait)) + qib_dev_err(dd, "piowait list not empty!\n"); + if (!list_empty(&dev->dmawait)) + qib_dev_err(dd, "dmawait list not empty!\n"); + if (!list_empty(&dev->txwait)) + qib_dev_err(dd, "txwait list not empty!\n"); + if (!list_empty(&dev->memwait)) + qib_dev_err(dd, "memwait list not empty!\n"); + if (dev->dma_mr) + qib_dev_err(dd, "DMA MR not NULL!\n"); + + qps_inuse = qib_free_all_qps(dd); + if (qps_inuse) + qib_dev_err(dd, "QP memory leak! %u still in use\n", + qps_inuse); + + del_timer_sync(&dev->mem_timer); + qib_free_qpn_table(&dev->qpn_table); + while (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + struct qib_verbs_txreq *tx; + + list_del(l); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + kfree(tx); + } + if (dd->pport->sdma_descq_cnt) + dma_free_coherent(&dd->pcidev->dev, + dd->pport->sdma_descq_cnt * + sizeof(struct qib_pio_header), + dev->pio_hdrs, dev->pio_hdrs_phys); + lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); + free_pages((unsigned long) dev->lk_table.table, + get_order(lk_tab_size)); + kfree(dev->qp_table); +} diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h new file mode 100644 index 00000000..0c19ef0c --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -0,0 +1,1097 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef QIB_VERBS_H +#define QIB_VERBS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +struct qib_ctxtdata; +struct qib_pportdata; +struct qib_devdata; +struct qib_verbs_txreq; + +#define QIB_MAX_RDMA_ATOMIC 16 +#define QIB_GUIDS_PER_PORT 5 + +#define QPN_MAX (1 << 24) +#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define QIB_UVERBS_ABI_VERSION 2 + +/* + * Define an ib_cq_notify value that is not valid so we know when CQ + * notifications are armed. + */ +#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) + +#define IB_SEQ_NAK (3 << 29) + +/* AETH NAK opcode values */ +#define IB_RNR_NAK 0x20 +#define IB_NAK_PSN_ERROR 0x60 +#define IB_NAK_INVALID_REQUEST 0x61 +#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 +#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 +#define IB_NAK_INVALID_RD_REQUEST 0x64 + +/* Flags for checking QP state (see ib_qib_state_ops[]) */ +#define QIB_POST_SEND_OK 0x01 +#define QIB_POST_RECV_OK 0x02 +#define QIB_PROCESS_RECV_OK 0x04 +#define QIB_PROCESS_SEND_OK 0x08 +#define QIB_PROCESS_NEXT_SEND_OK 0x10 +#define QIB_FLUSH_SEND 0x20 +#define QIB_FLUSH_RECV 0x40 +#define QIB_PROCESS_OR_FLUSH_SEND \ + (QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND) + +/* IB Performance Manager status values */ +#define IB_PMA_SAMPLE_STATUS_DONE 0x00 +#define IB_PMA_SAMPLE_STATUS_STARTED 0x01 +#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 + +/* Mandatory IB performance counter select values. */ +#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) +#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) +#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) +#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) +#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) + +#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) + +#define IB_BTH_REQ_ACK (1 << 31) +#define IB_BTH_SOLICITED (1 << 23) +#define IB_BTH_MIG_REQ (1 << 22) + +/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ +#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) + +#define IB_GRH_VERSION 6 +#define IB_GRH_VERSION_MASK 0xF +#define IB_GRH_VERSION_SHIFT 28 +#define IB_GRH_TCLASS_MASK 0xFF +#define IB_GRH_TCLASS_SHIFT 20 +#define IB_GRH_FLOW_MASK 0xFFFFF +#define IB_GRH_FLOW_SHIFT 0 +#define IB_GRH_NEXT_HDR 0x1B + +#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) + +/* Values for set/get portinfo VLCap OperationalVLs */ +#define IB_VL_VL0 1 +#define IB_VL_VL0_1 2 +#define IB_VL_VL0_3 3 +#define IB_VL_VL0_7 4 +#define IB_VL_VL0_14 5 + +static inline int qib_num_vls(int vls) +{ + switch (vls) { + default: + case IB_VL_VL0: + return 1; + case IB_VL_VL0_1: + return 2; + case IB_VL_VL0_3: + return 4; + case IB_VL_VL0_7: + return 8; + case IB_VL_VL0_14: + return 15; + } +} + +struct ib_reth { + __be64 vaddr; + __be32 rkey; + __be32 length; +} __attribute__ ((packed)); + +struct ib_atomic_eth { + __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ + __be32 rkey; + __be64 swap_data; + __be64 compare_data; +} __attribute__ ((packed)); + +struct qib_other_headers { + __be32 bth[3]; + union { + struct { + __be32 deth[2]; + __be32 imm_data; + } ud; + struct { + struct ib_reth reth; + __be32 imm_data; + } rc; + struct { + __be32 aeth; + __be32 atomic_ack_eth[2]; + } at; + __be32 imm_data; + __be32 aeth; + struct ib_atomic_eth atomic_eth; + } u; +} __attribute__ ((packed)); + +/* + * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes + * long (72 w/ imm_data). Only the first 56 bytes of the IB header + * will be in the eager header buffer. The remaining 12 or 16 bytes + * are in the data buffer. + */ +struct qib_ib_header { + __be16 lrh[4]; + union { + struct { + struct ib_grh grh; + struct qib_other_headers oth; + } l; + struct qib_other_headers oth; + } u; +} __attribute__ ((packed)); + +struct qib_pio_header { + __le32 pbc[2]; + struct qib_ib_header hdr; +} __attribute__ ((packed)); + +/* + * There is one struct qib_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct qib_mcast_qp. + */ +struct qib_mcast_qp { + struct list_head list; + struct qib_qp *qp; +}; + +struct qib_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + +/* Protection domain */ +struct qib_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address Handle */ +struct qib_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; +}; + +/* + * This structure is used by qib_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct qib_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct qib_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + union { + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[0]; + struct ib_wc kqueue[0]; + }; +}; + +/* + * The completion queue structure. + */ +struct qib_cq { + struct ib_cq ibcq; + struct work_struct comptask; + spinlock_t lock; /* protect changes in this struct */ + u8 notify; + u8 triggered; + struct qib_cq_wc *queue; + struct qib_mmap_info *ip; +}; + +/* + * A segment is a linear region of low physical memory. + * XXX Maybe we should use phys addr here and kmap()/kunmap(). + * Used by the verbs layer. + */ +struct qib_seg { + void *vaddr; + size_t length; +}; + +/* The number of qib_segs that fit in a page. */ +#define QIB_SEGSZ (PAGE_SIZE / sizeof(struct qib_seg)) + +struct qib_segarray { + struct qib_seg segs[QIB_SEGSZ]; +}; + +struct qib_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of qib_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + atomic_t refcount; + struct qib_segarray *map[0]; /* the segments */ +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct qib_sge { + struct qib_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* Memory region */ +struct qib_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct qib_mregion mr; /* must be last */ +}; + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct qib_swqe { + struct ib_send_wr wr; /* don't use wr.sg_list */ + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct qib_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct qib_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct qib_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct qib_rwqe wq[0]; +}; + +struct qib_rq { + struct qib_rwq *wq; + spinlock_t lock; /* protect changes in this struct */ + u32 size; /* size of RWQE array */ + u8 max_sge; +}; + +struct qib_srq { + struct ib_srq ibsrq; + struct qib_rq rq; + struct qib_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + +struct qib_sge_state { + struct qib_sge *sg_list; /* next SGE to be used if any */ + struct qib_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct qib_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct qib_sge rdma_sge; + u64 atomic_data; + }; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct qib_qp { + struct ib_qp ibqp; + struct qib_qp *next; /* link list for QPN hash table */ + struct qib_qp *timer_next; /* link list for qib_ib_timer() */ + struct list_head iowait; /* link for wait PIO buf */ + struct list_head rspwait; /* link for waititing to respond */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct qib_ib_header s_hdr; /* next packet header to send */ + atomic_t refcount; + wait_queue_head_t wait; + wait_queue_head_t wait_dma; + struct timer_list s_timer; + struct work_struct s_work; + struct qib_mmap_info *ip; + struct qib_sge_state *s_cur_sge; + struct qib_verbs_txreq *s_tx; + struct qib_mregion *s_rdma_mr; + struct qib_sge_state s_sge; /* current send request data */ + struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]; + struct qib_sge_state s_ack_rdma_sge; + struct qib_sge_state s_rdma_read_sge; + struct qib_sge_state r_sge; /* current receive data */ + spinlock_t r_lock; /* used for APM */ + spinlock_t s_lock; + atomic_t s_dma_busy; + u32 s_flags; + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u64 r_wr_id; /* ID for current receive WQE */ + unsigned long r_aflags; + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + u8 state; /* QP state */ + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_state; /* opcode of last packet received */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 r_flags; + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + u8 qp_access_flags; + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + u8 s_srate; + u8 s_draining; + u8 s_mig_state; + u8 timeout; /* Timeout for this QP */ + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 port_num; + enum ib_mtu path_mtu; + u32 pmtu; /* decoded from path_mtu */ + u32 remote_qpn; + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + unsigned long timeout_jiffies; /* computed from timeout */ + struct qib_swqe *s_wq; /* send work queue */ + struct qib_swqe *s_wqe; + struct qib_rq r_rq; /* receive work queue */ + struct qib_sge r_sg_list[0]; /* verified SGEs */ +}; + +/* + * Atomic bit definitions for r_aflags. + */ +#define QIB_R_WRID_VALID 0 +#define QIB_R_REWIND_SGE 1 + +/* + * Bit definitions for r_flags. + */ +#define QIB_R_REUSE_SGE 0x01 +#define QIB_R_RDMAR_SEQ 0x02 +#define QIB_R_RSP_NAK 0x04 +#define QIB_R_RSP_SEND 0x08 +#define QIB_R_COMM_EST 0x10 + +/* + * Bit definitions for s_flags. + * + * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled + * QIB_S_BUSY - send tasklet is processing the QP + * QIB_S_TIMER - the RC retry timer is active + * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics + * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete + * before processing the next SWQE + * QIB_S_WAIT_RNR - waiting for RNR timeout + * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA + * QIB_S_WAIT_PIO - waiting for a send buffer to be available + * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available + * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available + * QIB_S_WAIT_KMEM - waiting for kernel memory to be available + * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue + * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests + * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK + */ +#define QIB_S_SIGNAL_REQ_WR 0x0001 +#define QIB_S_BUSY 0x0002 +#define QIB_S_TIMER 0x0004 +#define QIB_S_RESP_PENDING 0x0008 +#define QIB_S_ACK_PENDING 0x0010 +#define QIB_S_WAIT_FENCE 0x0020 +#define QIB_S_WAIT_RDMAR 0x0040 +#define QIB_S_WAIT_RNR 0x0080 +#define QIB_S_WAIT_SSN_CREDIT 0x0100 +#define QIB_S_WAIT_DMA 0x0200 +#define QIB_S_WAIT_PIO 0x0400 +#define QIB_S_WAIT_TX 0x0800 +#define QIB_S_WAIT_DMA_DESC 0x1000 +#define QIB_S_WAIT_KMEM 0x2000 +#define QIB_S_WAIT_PSN 0x4000 +#define QIB_S_WAIT_ACK 0x8000 +#define QIB_S_SEND_ONE 0x10000 +#define QIB_S_UNLIMITED_CREDIT 0x20000 + +/* + * Wait flags that would prevent any packet type from being sent. + */ +#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \ + QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM) + +/* + * Wait flags that would prevent send work requests from making progress. + */ +#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \ + QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \ + QIB_S_WAIT_PSN | QIB_S_WAIT_ACK) + +#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND) + +#define QIB_PSN_CREDIT 16 + +/* + * Since struct qib_swqe is not a fixed size, we can't simply index into + * struct qib_qp.s_wq. This function does the array index computation. + */ +static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp, + unsigned n) +{ + return (struct qib_swqe *)((char *)qp->s_wq + + (sizeof(struct qib_swqe) + + qp->s_max_sge * + sizeof(struct qib_sge)) * n); +} + +/* + * Since struct qib_rwqe is not a fixed size, we can't simply index into + * struct qib_rwq.wq. This function does the array index computation. + */ +static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n) +{ + return (struct qib_rwqe *) + ((char *) rq->wq->wq + + (sizeof(struct qib_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct qpn_map { + void *page; +}; + +struct qib_qpn_table { + spinlock_t lock; /* protect changes in this struct */ + unsigned flags; /* flags for QP0/1 allocated for each port */ + u32 last; /* last QP number allocated */ + u32 nmaps; /* size of the map table */ + u16 limit; + u16 mask; + /* bit map of free QP numbers other than 0/1 */ + struct qpn_map map[QPNMAP_ENTRIES]; +}; + +struct qib_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct qib_mregion **table; +}; + +struct qib_opcode_stats { + u64 n_packets; /* number of packets */ + u64 n_bytes; /* total number of bytes */ +}; + +struct qib_ibport { + struct qib_qp *qp0; + struct qib_qp *qp1; + struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct qib_ah *sm_ah; + struct qib_ah *smi_ah; + struct rb_root mcast_tree; + spinlock_t lock; /* protect changes in this struct */ + + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + unsigned long trap_timeout; + __be64 gid_prefix; /* in network order */ + __be64 mkey; + __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ + u64 tid; /* TID for traps */ + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ + u64 z_symbol_error_counter; /* starting count for PMA */ + u64 z_link_error_recovery_counter; /* starting count for PMA */ + u64 z_link_downed_counter; /* starting count for PMA */ + u64 z_port_rcv_errors; /* starting count for PMA */ + u64 z_port_rcv_remphys_errors; /* starting count for PMA */ + u64 z_port_xmit_discards; /* starting count for PMA */ + u64 z_port_xmit_data; /* starting count for PMA */ + u64 z_port_rcv_data; /* starting count for PMA */ + u64 z_port_xmit_packets; /* starting count for PMA */ + u64 z_port_rcv_packets; /* starting count for PMA */ + u32 z_local_link_integrity_errors; /* starting count for PMA */ + u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ + u32 z_vl15_dropped; /* starting count for PMA */ + u32 n_rc_resends; + u32 n_rc_acks; + u32 n_rc_qacks; + u32 n_rc_delayed_comp; + u32 n_seq_naks; + u32 n_rdma_seq; + u32 n_rnr_naks; + u32 n_other_naks; + u32 n_loop_pkts; + u32 n_pkt_drops; + u32 n_vl15_dropped; + u32 n_rc_timeouts; + u32 n_dmawait; + u32 n_unaligned; + u32 n_rc_dupreq; + u32 n_rc_seqnak; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 pkey_violations; + u16 qkey_violations; + u16 mkey_violations; + u16 mkey_lease_period; + u16 sm_lid; + u16 repress_traps; + u8 sm_sl; + u8 mkeyprot; + u8 subnet_timeout; + u8 vl_high_limit; + u8 sl_to_vl[16]; + + struct qib_opcode_stats opstats[128]; +}; + +struct qib_ibdev { + struct ib_device ibdev; + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; /* protect mmap_offset */ + u32 mmap_offset; + struct qib_mregion *dma_mr; + + /* QP numbers are shared by all IB ports */ + struct qib_qpn_table qpn_table; + struct qib_lkey_table lk_table; + struct list_head piowait; /* list for wait PIO buf */ + struct list_head dmawait; /* list for wait DMA */ + struct list_head txwait; /* list for wait qib_verbs_txreq */ + struct list_head memwait; /* list for wait kernel memory */ + struct list_head txreq_free; + struct timer_list mem_timer; + struct qib_qp **qp_table; + struct qib_pio_header *pio_hdrs; + dma_addr_t pio_hdrs_phys; + /* list of QPs waiting for RNR timer */ + spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */ + u32 qp_table_size; /* size of the hash table */ + u32 qp_rnd; /* random bytes for hash */ + spinlock_t qpt_lock; + + u32 n_piowait; + u32 n_txwait; + + u32 n_pds_allocated; /* number of PDs allocated for device */ + spinlock_t n_pds_lock; + u32 n_ahs_allocated; /* number of AHs allocated for device */ + spinlock_t n_ahs_lock; + u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; + u32 n_srqs_allocated; /* number of SRQs allocated for device */ + spinlock_t n_srqs_lock; + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; +}; + +struct qib_verbs_counters { + u64 symbol_error_counter; + u64 link_error_recovery_counter; + u64 link_downed_counter; + u64 port_rcv_errors; + u64 port_rcv_remphys_errors; + u64 port_xmit_discards; + u64 port_xmit_data; + u64 port_rcv_data; + u64 port_xmit_packets; + u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; + u32 vl15_dropped; +}; + +static inline struct qib_mr *to_imr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct qib_mr, ibmr); +} + +static inline struct qib_pd *to_ipd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct qib_pd, ibpd); +} + +static inline struct qib_ah *to_iah(struct ib_ah *ibah) +{ + return container_of(ibah, struct qib_ah, ibah); +} + +static inline struct qib_cq *to_icq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct qib_cq, ibcq); +} + +static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct qib_srq, ibsrq); +} + +static inline struct qib_qp *to_iqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct qib_qp, ibqp); +} + +static inline struct qib_ibdev *to_idev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct qib_ibdev, ibdev); +} + +/* + * Send if not busy or waiting for I/O and either + * a RC response is pending or we can process send work requests. + */ +static inline int qib_send_ok(struct qib_qp *qp) +{ + return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) && + (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) || + !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); +} + +extern struct workqueue_struct *qib_cq_wq; + +/* + * This must be called with s_lock held. + */ +static inline void qib_schedule_send(struct qib_qp *qp) +{ + if (qib_send_ok(qp)) + queue_work(ib_wq, &qp->s_work); +} + +static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) +{ + u16 p1 = pkey1 & 0x7FFF; + u16 p2 = pkey2 & 0x7FFF; + + /* + * Low 15 bits must be non-zero and match, and + * one of the two must be a full member. + */ + return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0); +} + +void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); +void qib_cap_mask_chg(struct qib_ibport *ibp); +void qib_sys_guid_chg(struct qib_ibport *ibp); +void qib_node_desc_chg(struct qib_ibport *ibp); +int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +int qib_create_agents(struct qib_ibdev *dev); +void qib_free_agents(struct qib_ibdev *dev); + +/* + * Compare the lower 24 bits of the two values. + * Returns an integer <, ==, or > than zero. + */ +static inline int qib_cmp24(u32 a, u32 b) +{ + return (((int) a) - ((int) b)) << 8; +} + +struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid); + +int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait); + +int qib_get_counters(struct qib_pportdata *ppd, + struct qib_verbs_counters *cntrs); + +int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int qib_mcast_tree_empty(struct qib_ibport *ibp); + +__be32 qib_compute_aeth(struct qib_qp *qp); + +struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn); + +struct ib_qp *qib_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int qib_destroy_qp(struct ib_qp *ibqp); + +int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err); + +int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); + +unsigned qib_free_all_qps(struct qib_devdata *dd); + +void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); + +void qib_free_qpn_table(struct qib_qpn_table *qpt); + +void qib_get_credit(struct qib_qp *qp, u32 aeth); + +unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); + +void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail); + +void qib_put_txreq(struct qib_verbs_txreq *tx); + +int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len); + +void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, + int release); + +void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release); + +void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp); + +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp); + +int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); + +void qib_rc_rnr_retry(unsigned long arg); + +void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); + +void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err); + +int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); + +void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp); + +int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr); + +int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr); + +int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, + struct qib_sge *isge, struct ib_sge *sge, int acc); + +int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc); + +int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +struct ib_srq *qib_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + +int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata); + +int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); + +int qib_destroy_srq(struct ib_srq *ibsrq); + +void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); + +int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); + +struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata); + +int qib_destroy_cq(struct ib_cq *ibcq); + +int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); + +int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); + +struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc); + +struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start); + +struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata); + +int qib_dereg_mr(struct ib_mr *ibmr); + +struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); + +struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list( + struct ib_device *ibdev, int page_list_len); + +void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); + +int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr); + +struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova); + +int qib_unmap_fmr(struct list_head *fmr_list); + +int qib_dealloc_fmr(struct ib_fmr *ibfmr); + +void qib_release_mmap_info(struct kref *ref); + +struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, + struct ib_ucontext *context, + void *obj); + +void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip, + u32 size, void *obj); + +int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +int qib_get_rwqe(struct qib_qp *qp, int wr_id_only); + +void qib_migrate_qp(struct qib_qp *qp); + +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, struct qib_qp *qp, u32 bth0); + +u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, + struct ib_global_route *grh, u32 hwords, u32 nwords); + +void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, + u32 bth0, u32 bth2); + +void qib_do_send(struct work_struct *work); + +void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, + enum ib_wc_status status); + +void qib_send_rc_ack(struct qib_qp *qp); + +int qib_make_rc_req(struct qib_qp *qp); + +int qib_make_uc_req(struct qib_qp *qp); + +int qib_make_ud_req(struct qib_qp *qp); + +int qib_register_ib_device(struct qib_devdata *); + +void qib_unregister_ib_device(struct qib_devdata *); + +void qib_ib_rcv(struct qib_ctxtdata *, void *, void *, u32); + +void qib_ib_piobufavail(struct qib_devdata *); + +unsigned qib_get_npkeys(struct qib_devdata *); + +unsigned qib_get_pkey(struct qib_ibport *, unsigned); + +extern const enum ib_wc_opcode ib_qib_wc_opcode[]; + +/* + * Below HCA-independent IB PhysPortState values, returned + * by the f_ibphys_portstate() routine. + */ +#define IB_PHYSPORTSTATE_SLEEP 1 +#define IB_PHYSPORTSTATE_POLL 2 +#define IB_PHYSPORTSTATE_DISABLED 3 +#define IB_PHYSPORTSTATE_CFG_TRAIN 4 +#define IB_PHYSPORTSTATE_LINKUP 5 +#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6 +#define IB_PHYSPORTSTATE_CFG_DEBOUNCE 8 +#define IB_PHYSPORTSTATE_CFG_IDLE 0xB +#define IB_PHYSPORTSTATE_RECOVERY_RETRAIN 0xC +#define IB_PHYSPORTSTATE_RECOVERY_WAITRMT 0xE +#define IB_PHYSPORTSTATE_RECOVERY_IDLE 0xF +#define IB_PHYSPORTSTATE_CFG_ENH 0x10 +#define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13 + +extern const int ib_qib_state_ops[]; + +extern __be64 ib_qib_sys_image_guid; /* in network order */ + +extern unsigned int ib_qib_lkey_table_size; + +extern unsigned int ib_qib_max_cqes; + +extern unsigned int ib_qib_max_cqs; + +extern unsigned int ib_qib_max_qp_wrs; + +extern unsigned int ib_qib_max_qps; + +extern unsigned int ib_qib_max_sges; + +extern unsigned int ib_qib_max_mcast_grps; + +extern unsigned int ib_qib_max_mcast_qp_attached; + +extern unsigned int ib_qib_max_srqs; + +extern unsigned int ib_qib_max_srq_sges; + +extern unsigned int ib_qib_max_srq_wrs; + +extern const u32 ib_qib_rnr_table[]; + +extern struct ib_dma_mapping_ops qib_dma_mapping_ops; + +#endif /* QIB_VERBS_H */ diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c new file mode 100644 index 00000000..dabb697b --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "qib.h" + +/** + * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * @qp: the QP to link + */ +static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp) +{ + struct qib_mcast_qp *mqp; + + mqp = kmalloc(sizeof *mqp, GFP_KERNEL); + if (!mqp) + goto bail; + + mqp->qp = qp; + atomic_inc(&qp->refcount); + +bail: + return mqp; +} + +static void qib_mcast_qp_free(struct qib_mcast_qp *mqp) +{ + struct qib_qp *qp = mqp->qp; + + /* Notify qib_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + kfree(mqp); +} + +/** + * qib_mcast_alloc - allocate the multicast GID structure + * @mgid: the multicast GID + * + * A list of QPs will be attached to this structure. + */ +static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid) +{ + struct qib_mcast *mcast; + + mcast = kmalloc(sizeof *mcast, GFP_KERNEL); + if (!mcast) + goto bail; + + mcast->mgid = *mgid; + INIT_LIST_HEAD(&mcast->qp_list); + init_waitqueue_head(&mcast->wait); + atomic_set(&mcast->refcount, 0); + mcast->n_attached = 0; + +bail: + return mcast; +} + +static void qib_mcast_free(struct qib_mcast *mcast) +{ + struct qib_mcast_qp *p, *tmp; + + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) + qib_mcast_qp_free(p); + + kfree(mcast); +} + +/** + * qib_mcast_find - search the global table for the given multicast GID + * @ibp: the IB port structure + * @mgid: the multicast GID to search for + * + * Returns NULL if not found. + * + * The caller is responsible for decrementing the reference count if found. + */ +struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid) +{ + struct rb_node *n; + unsigned long flags; + struct qib_mcast *mcast; + + spin_lock_irqsave(&ibp->lock, flags); + n = ibp->mcast_tree.rb_node; + while (n) { + int ret; + + mcast = rb_entry(n, struct qib_mcast, rb_node); + + ret = memcmp(mgid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else { + atomic_inc(&mcast->refcount); + spin_unlock_irqrestore(&ibp->lock, flags); + goto bail; + } + } + spin_unlock_irqrestore(&ibp->lock, flags); + + mcast = NULL; + +bail: + return mcast; +} + +/** + * qib_mcast_add - insert mcast GID into table and attach QP struct + * @mcast: the mcast GID table + * @mqp: the QP to attach + * + * Return zero if both were added. Return EEXIST if the GID was already in + * the table but the QP was added. Return ESRCH if the QP was already + * attached and neither structure was added. + */ +static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp, + struct qib_mcast *mcast, struct qib_mcast_qp *mqp) +{ + struct rb_node **n = &ibp->mcast_tree.rb_node; + struct rb_node *pn = NULL; + int ret; + + spin_lock_irq(&ibp->lock); + + while (*n) { + struct qib_mcast *tmcast; + struct qib_mcast_qp *p; + + pn = *n; + tmcast = rb_entry(pn, struct qib_mcast, rb_node); + + ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = &pn->rb_left; + continue; + } + if (ret > 0) { + n = &pn->rb_right; + continue; + } + + /* Search the QP list to see if this is already there. */ + list_for_each_entry_rcu(p, &tmcast->qp_list, list) { + if (p->qp == mqp->qp) { + ret = ESRCH; + goto bail; + } + } + if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) { + ret = ENOMEM; + goto bail; + } + + tmcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &tmcast->qp_list); + ret = EEXIST; + goto bail; + } + + spin_lock(&dev->n_mcast_grps_lock); + if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) { + spin_unlock(&dev->n_mcast_grps_lock); + ret = ENOMEM; + goto bail; + } + + dev->n_mcast_grps_allocated++; + spin_unlock(&dev->n_mcast_grps_lock); + + mcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &mcast->qp_list); + + atomic_inc(&mcast->refcount); + rb_link_node(&mcast->rb_node, pn, n); + rb_insert_color(&mcast->rb_node, &ibp->mcast_tree); + + ret = 0; + +bail: + spin_unlock_irq(&ibp->lock); + + return ret; +} + +int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_ibport *ibp; + struct qib_mcast *mcast; + struct qib_mcast_qp *mqp; + int ret; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { + ret = -EINVAL; + goto bail; + } + + /* + * Allocate data structures since its better to do this outside of + * spin locks and it will most likely be needed. + */ + mcast = qib_mcast_alloc(gid); + if (mcast == NULL) { + ret = -ENOMEM; + goto bail; + } + mqp = qib_mcast_qp_alloc(qp); + if (mqp == NULL) { + qib_mcast_free(mcast); + ret = -ENOMEM; + goto bail; + } + ibp = to_iport(ibqp->device, qp->port_num); + switch (qib_mcast_add(dev, ibp, mcast, mqp)) { + case ESRCH: + /* Neither was used: OK to attach the same QP twice. */ + qib_mcast_qp_free(mqp); + qib_mcast_free(mcast); + break; + + case EEXIST: /* The mcast wasn't used */ + qib_mcast_free(mcast); + break; + + case ENOMEM: + /* Exceeded the maximum number of mcast groups. */ + qib_mcast_qp_free(mqp); + qib_mcast_free(mcast); + ret = -ENOMEM; + goto bail; + + default: + break; + } + + ret = 0; + +bail: + return ret; +} + +int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); + struct qib_mcast *mcast = NULL; + struct qib_mcast_qp *p, *tmp; + struct rb_node *n; + int last = 0; + int ret; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { + ret = -EINVAL; + goto bail; + } + + spin_lock_irq(&ibp->lock); + + /* Find the GID in the mcast table. */ + n = ibp->mcast_tree.rb_node; + while (1) { + if (n == NULL) { + spin_unlock_irq(&ibp->lock); + ret = -EINVAL; + goto bail; + } + + mcast = rb_entry(n, struct qib_mcast, rb_node); + ret = memcmp(gid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + break; + } + + /* Search the QP list. */ + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { + if (p->qp != qp) + continue; + /* + * We found it, so remove it, but don't poison the forward + * link until we are sure there are no list walkers. + */ + list_del_rcu(&p->list); + mcast->n_attached--; + + /* If this was the last attached QP, remove the GID too. */ + if (list_empty(&mcast->qp_list)) { + rb_erase(&mcast->rb_node, &ibp->mcast_tree); + last = 1; + } + break; + } + + spin_unlock_irq(&ibp->lock); + + if (p) { + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + qib_mcast_qp_free(p); + } + if (last) { + atomic_dec(&mcast->refcount); + wait_event(mcast->wait, !atomic_read(&mcast->refcount)); + qib_mcast_free(mcast); + spin_lock_irq(&dev->n_mcast_grps_lock); + dev->n_mcast_grps_allocated--; + spin_unlock_irq(&dev->n_mcast_grps_lock); + } + + ret = 0; + +bail: + return ret; +} + +int qib_mcast_tree_empty(struct qib_ibport *ibp) +{ + return ibp->mcast_tree.rb_node == NULL; +} diff --git a/drivers/infiniband/hw/qib/qib_wc_ppc64.c b/drivers/infiniband/hw/qib/qib_wc_ppc64.c new file mode 100644 index 00000000..673cf4c2 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_wc_ppc64.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on PowerPC only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include "qib.h" + +/** + * qib_enable_wc - enable write combining for MMIO writes to the device + * @dd: qlogic_ib device + * + * Nothing to do on PowerPC, so just return without error. + */ +int qib_enable_wc(struct qib_devdata *dd) +{ + return 0; +} + +/** + * qib_unordered_wc - indicate whether write combining is unordered + * + * Because our performance depends on our ability to do write + * combining mmio writes in the most efficient way, we need to + * know if we are on a processor that may reorder stores when + * write combining. + */ +int qib_unordered_wc(void) +{ + return 1; +} diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c new file mode 100644 index 00000000..561b8bca --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on x86_64 only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include +#include +#include + +#include "qib.h" + +/** + * qib_enable_wc - enable write combining for MMIO writes to the device + * @dd: qlogic_ib device + * + * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable + * write combining. + */ +int qib_enable_wc(struct qib_devdata *dd) +{ + int ret = 0; + u64 pioaddr, piolen; + unsigned bits; + const unsigned long addr = pci_resource_start(dd->pcidev, 0); + const size_t len = pci_resource_len(dd->pcidev, 0); + + /* + * Set the PIO buffers to be WCCOMB, so we get HT bursts to the + * chip. Linux (possibly the hardware) requires it to be on a power + * of 2 address matching the length (which has to be a power of 2). + * For rev1, that means the base address, for rev2, it will be just + * the PIO buffers themselves. + * For chips with two sets of buffers, the calculations are + * somewhat more complicated; we need to sum, and the piobufbase + * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. + * The buffers are still packed, so a single range covers both. + */ + if (dd->piobcnt2k && dd->piobcnt4k) { + /* 2 sizes for chip */ + unsigned long pio2kbase, pio4kbase; + pio2kbase = dd->piobufbase & 0xffffffffUL; + pio4kbase = (dd->piobufbase >> 32) & 0xffffffffUL; + if (pio2kbase < pio4kbase) { + /* all current chips */ + pioaddr = addr + pio2kbase; + piolen = pio4kbase - pio2kbase + + dd->piobcnt4k * dd->align4k; + } else { + pioaddr = addr + pio4kbase; + piolen = pio2kbase - pio4kbase + + dd->piobcnt2k * dd->palign; + } + } else { /* single buffer size (2K, currently) */ + pioaddr = addr + dd->piobufbase; + piolen = dd->piobcnt2k * dd->palign + + dd->piobcnt4k * dd->align4k; + } + + for (bits = 0; !(piolen & (1ULL << bits)); bits++) + /* do nothing */ ; + + if (piolen != (1ULL << bits)) { + piolen >>= bits; + while (piolen >>= 1) + bits++; + piolen = 1ULL << (bits + 1); + } + if (pioaddr & (piolen - 1)) { + u64 atmp; + atmp = pioaddr & ~(piolen - 1); + if (atmp < addr || (atmp + piolen) > (addr + len)) { + qib_dev_err(dd, "No way to align address/size " + "(%llx/%llx), no WC mtrr\n", + (unsigned long long) atmp, + (unsigned long long) piolen << 1); + ret = -ENODEV; + } else { + pioaddr = atmp; + piolen <<= 1; + } + } + + if (!ret) { + int cookie; + + cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); + if (cookie < 0) { + { + qib_devinfo(dd->pcidev, + "mtrr_add() WC for PIO bufs " + "failed (%d)\n", + cookie); + ret = -EINVAL; + } + } else { + dd->wc_cookie = cookie; + dd->wc_base = (unsigned long) pioaddr; + dd->wc_len = (unsigned long) piolen; + } + } + + return ret; +} + +/** + * qib_disable_wc - disable write combining for MMIO writes to the device + * @dd: qlogic_ib device + */ +void qib_disable_wc(struct qib_devdata *dd) +{ + if (dd->wc_cookie) { + int r; + + r = mtrr_del(dd->wc_cookie, dd->wc_base, + dd->wc_len); + if (r < 0) + qib_devinfo(dd->pcidev, + "mtrr_del(%lx, %lx, %lx) failed: %d\n", + dd->wc_cookie, dd->wc_base, + dd->wc_len, r); + dd->wc_cookie = 0; /* even on failure */ + } +} + +/** + * qib_unordered_wc - indicate whether write combining is ordered + * + * Because our performance depends on our ability to do write combining mmio + * writes in the most efficient way, we need to know if we are on an Intel + * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in + * the order completed, and so no special flushing is required to get + * correct ordering. Intel processors, however, will flush write buffers + * out in "random" orders, and so explicit ordering is needed at times. + */ +int qib_unordered_wc(void) +{ + return boot_cpu_data.x86_vendor != X86_VENDOR_AMD; +} -- cgit