Benchmark


ストレージ関連のベンチマーク

Window上で、PX-128M2P x 4台使用してのベンチ結果

 RAID Card


 ZFS


 InfiniBand



 10GbE


Soft RoCE

  • ESXi 4.1u2上でさっくり環境作ってチェックしてみました。
root@ubuntu2:~/OFED-1.5.2-rxe# ib_read_bw 192.168.1.2
------------------------------------------------------------------
                    RDMA_Read BW Test
 Connection type : RC
 Link type is ETH
 Using gid index 0 as source GID
 Number of outstanding reads is 4
 local address: LID 0000 QPN 0x0010 PSN 0x959124 OUT 0x04 RKey 0x000104 VAddr 0x007f4768451000
 GID: 254:128:00:00:00:00:00:00:02:12:41:255:254:142:29:245
 remote address: LID 0000 QPN 0x0010 PSN 0x3034b2 OUT 0x04 RKey 0x000102 VAddr 0x007fe54643c000
 GID: 254:128:00:00:00:00:00:00:02:12:41:255:254:67:180:181
 Mtu : 1024
------------------------------------------------------------------
 #bytes     #iterations    BW peak[MB/sec]    BW average[MB/sec]
 65536      1000           390.28             379.58
------------------------------------------------------------------
root@ubuntu2:~/OFED-1.5.2-rxe# ib_read_lat 192.168.1.2
------------------------------------------------------------------
                    RDMA_Read Latency Test
 Connection type : RC
 Link type is ETH
 Using gid index 0 as source GID
 Number of outstanding reads is 4
 local address: LID 0000 QPN 0x0010 PSN 0x37fe66 OUT 0x04 RKey 0x000108 VAddr 0x00000000771000
 GID: 254:128:00:00:00:00:00:00:02:12:41:255:254:142:29:245
 remote address: LID 0000 QPN 0x0010 PSN 0x886dc3 OUT 0x04 RKey 0x000104 VAddr 0x00000000dd8000
 GID: 254:128:00:00:00:00:00:00:02:12:41:255:254:67:180:181
Mtu : 1024
------------------------------------------------------------------
 #bytes #iterations    t_min[usec]    t_max[usec]  t_typical[usec]
 2       1000          23.58          8241.60      26.47
------------------------------------------------------------------
root@ubuntu2:~/OFED-1.5.2-rxe# ib_send_bw 192.168.1.2
------------------------------------------------------------------
                    Send BW Test
 Connection type : RC
 Inline data is used up to 0 bytes message
 Link type is ETH
 Using gid index 0 as source GID
 local address: LID 0000 QPN 0x0010 PSN 0x59cbdf
 GID: 254:128:00:00:00:00:00:00:02:12:41:255:254:142:29:245
 remote address: LID 0000 QPN 0x0010 PSN 0x1f9e44
 GID: 254:128:00:00:00:00:00:00:02:12:41:255:254:67:180:181
 Mtu : 1024
------------------------------------------------------------------
 #bytes     #iterations    BW peak[MB/sec]    BW average[MB/sec]

ここで、接続に行った側のOSが死亡。

 SRP

TargetはOpenIndiana 151aで、InitiatorはCentOS 6.2を使用
Initiator側の環境は以下のように
OFED 1.5.4

# ibv_devinfo -vvv
hca_id: mlx4_0
        transport:                      InfiniBand (0)
        fw_ver:                         2.9.1000
        node_guid:                      001e:0bff:ff4d:61a8
        sys_image_guid:                 001e:0bff:ff4d:61ab
        vendor_id:                      0x02c9
        vendor_part_id:                 25418
        hw_ver:                         0xA0
        board_id:                       HP_09D0000001
        phys_port_cnt:                  2
        max_mr_size:                    0xffffffffffffffff
        page_size_cap:                  0xfffffe00
        max_qp:                         260032
        max_qp_wr:                      16351
        device_cap_flags:               0x007c9c76
        max_sge:                        32
        max_sge_rd:                     0
        max_cq:                         65408
        max_cqe:                        4194303
        max_mr:                         524272
        max_pd:                         32764
        max_qp_rd_atom:                 16
        max_ee_rd_atom:                 0
        max_res_rd_atom:                4160512
        max_qp_init_rd_atom:            128
        max_ee_init_rd_atom:            0
        atomic_cap:                     ATOMIC_HCA (1)
        max_ee:                         0
        max_rdd:                        0
        max_mw:                         0
        max_raw_ipv6_qp:                0
        max_raw_ethy_qp:                2
        max_mcast_grp:                  8192
        max_mcast_qp_attach:            120
        max_total_mcast_qp_attach:      983040
        max_ah:                         0
        max_fmr:                        0
        max_srq:                        65472
        max_srq_wr:                     16383
        max_srq_sge:                    31
        max_pkeys:                      128
        local_ca_ack_delay:             15
                port:   1
                        state:                  PORT_ACTIVE (4)
                        max_mtu:                2048 (4)
                        active_mtu:             2048 (4)
                        sm_lid:                 2
                        port_lid:               2
                        port_lmc:               0x00
                        link_layer:             IB
                        max_msg_sz:             0x40000000
                        port_cap_flags:         0x0251086a
                        max_vl_num:             8 (4)
                        bad_pkey_cntr:          0x0
                        qkey_viol_cntr:         0x0
                        sm_sl:                  0
                        pkey_tbl_len:           128
                        gid_tbl_len:            128
                        subnet_timeout:         18
                        init_type_reply:        0
                        active_width:           4X (2)
                        active_speed:           5.0 Gbps (2)
                        phys_state:             LINK_UP (5)
                        GID[  0]:               fe80:0000:0000:0000:001e:0bff:ff4d:61a9

                port:   2
                        state:                  PORT_DOWN (1)
                        max_mtu:                2048 (4)
                        active_mtu:             2048 (4)
                        sm_lid:                 0
                        port_lid:               0
                        port_lmc:               0x00
                        link_layer:             IB
                        max_msg_sz:             0x40000000
                        port_cap_flags:         0x02510868
                        max_vl_num:             8 (4)
                        bad_pkey_cntr:          0x0
                        qkey_viol_cntr:         0x0
                        sm_sl:                  0
                        pkey_tbl_len:           128
                        gid_tbl_len:            128
                        subnet_timeout:         0
                        init_type_reply:        0
                        active_width:           4X (2)
                        active_speed:           2.5 Gbps (1)
                        phys_state:             POLLING (2)
                        GID[  0]:               fe80:0000:0000:0000:001e:0bff:ff4d:61aa

# lspci
00:00.0 Host bridge: Intel Corporation Xeon E3-1200 Processor Family DRAM Controller (rev 09)
00:01.0 PCI bridge: Intel Corporation Xeon E3-1200/2nd Generation Core Processor Family PCI Express Root Port (rev 09)
00:02.0 VGA compatible controller: Intel Corporation Xeon E3-1200 Processor Family Integrated Graphics Controller (rev 09)
00:16.0 Communication controller: Intel Corporation 6 Series/C200 Series Chipset Family MEI Controller #1 (rev 04)
00:19.0 Ethernet controller: Intel Corporation 82579LM Gigabit Network Connection (rev 05)
00:1a.0 USB controller: Intel Corporation 6 Series/C200 Series Chipset Family USB Enhanced Host Controller #2 (rev 05)
00:1c.0 PCI bridge: Intel Corporation 6 Series/C200 Series Chipset Family PCI Express Root Port 1 (rev b5)
00:1c.1 PCI bridge: Intel Corporation 6 Series/C200 Series Chipset Family PCI Express Root Port 2 (rev b5)
00:1d.0 USB controller: Intel Corporation 6 Series/C200 Series Chipset Family USB Enhanced Host Controller #1 (rev 05)
00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev a5)
00:1f.0 ISA bridge: Intel Corporation C206 Chipset Family LPC Controller (rev 05)
00:1f.2 SATA controller: Intel Corporation 6 Series/C200 Series Chipset Family 6 port SATA AHCI Controller (rev 05)
00:1f.3 SMBus: Intel Corporation 6 Series/C200 Series Chipset Family SMBus Controller (rev 05)
01:00.0 Network controller: Mellanox Technologies MT25418 [ConnectX VPI PCIe 2.0 2.5GT/s - IB DDR / 10GigE] (rev a0)
03:00.0 Ethernet controller: Intel Corporation 82574L Gigabit Network Connection

# lspci -vvv

01:00.0 Network controller: Mellanox Technologies MT25418 [ConnectX VPI PCIe 2.0 2.5GT/s - IB DDR / 10GigE] (rev a0)
        Subsystem: Mellanox Technologies MT25418 [ConnectX VPI PCIe 2.0 2.5GT/s - IB DDR / 10GigE]
        Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
        Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
        Latency: 0, Cache Line Size: 64 bytes
        Interrupt: pin A routed to IRQ 16
        Region 0: Memory at fe500000 (64-bit, non-prefetchable) [size=1M]
        Region 2: Memory at f0000000 (64-bit, prefetchable) [size=8M]
        Expansion ROM at fe400000 [disabled] [size=1M]
        Capabilities: [40] Power Management version 3
                Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
                Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME-
        Capabilities: [48] Vital Product Data
                Product Name: Eagle DDR
                Read-only fields:
                        [PN] Part number: 448397-B21
                        [EC] Engineering changes: C1
                        [SN] Serial number: IL2090503A
                        [V0] Vendor specific: PCIe x8
                        [RV] Reserved: checksum good, 0 byte(s) reserved
                Read/write fields:
                        [V1] Vendor specific: N/A
                        [YA] Asset tag: N/A
                        [RW] Read-write area: 111 byte(s) free
                End
        Capabilities: [9c] MSI-X: Enable+ Count=128 Masked-
                Vector table: BAR=0 offset=0007c000
                PBA: BAR=0 offset=0007d000
        Capabilities: [60] Express (v2) Endpoint, MSI 00
                DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <64ns, L1 unlimited
                        ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset+
                DevCtl: Report errors: Correctable- Non-Fatal- Fatal- Unsupported-
                        RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop- FLReset-
                        MaxPayload 128 bytes, MaxReadReq 512 bytes
                DevSta: CorrErr- UncorrErr- FatalErr- UnsuppReq- AuxPwr- TransPend-
                LnkCap: Port #8, Speed 2.5GT/s, Width x8, ASPM L0s, Latency L0 unlimited, L1 unlimited
                        ClockPM- Surprise- LLActRep- BwNot-
                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- Retrain- CommClk-
                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
                LnkSta: Speed 2.5GT/s, Width x8, TrErr- Train- SlotClk- DLActive- BWMgmt- ABWMgmt-
                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+
                DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-
                LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-, Selectable De-emphasis: -6dB
                         Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
                         Compliance De-emphasis: -6dB
                LnkSta2: Current De-emphasis Level: -6dB
        Capabilities: [100] Alternative Routing-ID Interpretation (ARI)
                ARICap: MFVC- ACS-, Next Function: 1
                ARICtl: MFVC- ACS-, Function Group: 0
        Kernel driver in use: mlx4_core
        Kernel modules: mlx4_en, mlx4_core


scsi host7: ib_srp: new target: id_ext 0002c903000bc532 ioc_guid 0002c903000bc532 pkey ffff service_id 0002c903000bc532 dgid fe80:0000:0000:0000:0002:c903:000b:c533
scsi7 : SRP.T10:0002C903000BC532
scsi 7:0:0:0: Direct-Access     OI       COMSTAR          1.0  PQ: 0 ANSI: 5
sd 7:0:0:0: Attached scsi generic sg1 type 0
scsi 7:0:0:1: Direct-Access     OI       COMSTAR          1.0  PQ: 0 ANSI: 5
sd 7:0:0:1: Attached scsi generic sg2 type 0
sd 7:0:0:1: [sdc] 167772160 512-byte logical blocks: (85.8 GB/80.0 GiB)
sd 7:0:0:0: [sdb] 18874368 512-byte logical blocks: (9.66 GB/9.00 GiB)
sd 7:0:0:1: [sdc] Write Protect is off
sd 7:0:0:1: [sdc] Mode Sense: 53 00 00 00
sd 7:0:0:0: [sdb] Write Protect is off
sd 7:0:0:0: [sdb] Mode Sense: 53 00 00 00
sd 7:0:0:1: [sdc] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sd 7:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
 sdb:
 sdc: unknown partition table
 unknown partition table
sd 7:0:0:1: [sdc] Attached SCSI disk
sd 7:0:0:0: [sdb] Attached SCSI disk
 sdb: sdb1
 sdc: sdc1

上記例では、sdbがRamdiskでsdcがSSDx4のStripeになっている。
ソフト的な限界か解らないけど、スループットが750MB/sくらいで頭打ちに。

 iSCSI

Targetはoi151a3でInitiatorはUbuntu 12.04LTS
接続はiSER使用。Target側は普通に使用してる(zil ssd + HDD)pool。
Initiator側もzfsのpoolってアレな状態。
ちなみに20GBだと、メモリのキャッシュに載ってしまって速度がアレな値に

root@kvm-server-01:~# fio -filename=/rzpool/file20G -rw=randwrite -bs=1m -size=20G -numjobs=4 -runtime=10 -group_reporting -name=file1
file1: (g=0): rw=randwrite, bs=1M-1M/1M-1M, ioengine=sync, iodepth=1
...
file1: (g=0): rw=randwrite, bs=1M-1M/1M-1M, ioengine=sync, iodepth=1
fio 1.59
Starting 4 processes
Jobs: 4 (f=4): [wwww] [100.0% done] [0K/77594K /s] [0 /74  iops] [eta 00m:00s]
file1: (groupid=0, jobs=4): err= 0: pid=2928
  write: io=948224KB, bw=88561KB/s, iops=86 , runt= 10707msec
    clat (usec): min=363 , max=1175.5K, avg=46137.22, stdev=184991.24
     lat (usec): min=402 , max=1175.1K, avg=46238.06, stdev=184983.00
    bw (KB/s) : min=11623, max=47104, per=25.70%, avg=22760.21, stdev=8046.29
  cpu          : usr=0.23%, sys=1.55%, ctx=1062, majf=0, minf=103
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued r/w/d: total=0/926/0, short=0/0/0
     lat (usec): 500=10.04%, 750=37.26%, 1000=29.81%
     lat (msec): 2=4.86%, 4=1.30%, 10=1.94%, 20=0.76%, 50=8.42%
     lat (msec): 100=0.43%, 750=2.16%, 1000=2.16%, 2000=0.86%

Run status group 0 (all jobs):
  WRITE: io=948224KB, aggrb=88561KB/s, minb=90686KB/s, maxb=90686KB/s, mint=10707msec, maxt=10707msec

今度はサイズを50GBにしてチェック

root@kvm-server-01:~# fio -filename=/rzpool/file20G -rw=randwrite -bs=1m -size=50G -numjobs=4 -runtime=10 -group_reporting -name=file1
file1: (g=0): rw=randwrite, bs=1M-1M/1M-1M, ioengine=sync, iodepth=1
...
file1: (g=0): rw=randwrite, bs=1M-1M/1M-1M, ioengine=sync, iodepth=1
fio 1.59
Starting 4 processes
Jobs: 4 (f=4): [wwww] [100.0% done] [0K/101.4M /s] [0 /99  iops] [eta 00m:00s]
file1: (groupid=0, jobs=4): err= 0: pid=2970
  write: io=1033.0MB, bw=98436KB/s, iops=96 , runt= 10746msec
    clat (usec): min=240 , max=1302.8K, avg=41540.64, stdev=164526.01
     lat (usec): min=273 , max=1302.9K, avg=41601.14, stdev=164527.85
    bw (KB/s) : min=13473, max=61527, per=25.89%, avg=25481.95, stdev=10701.77
  cpu          : usr=0.16%, sys=1.54%, ctx=2060, majf=0, minf=103
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued r/w/d: total=0/1033/0, short=0/0/0
     lat (usec): 250=0.19%, 500=7.94%, 750=3.39%, 1000=6.58%
     lat (msec): 2=13.36%, 4=9.87%, 10=35.33%, 20=8.81%, 50=10.07%
     lat (msec): 100=0.19%, 750=2.71%, 1000=0.77%, 2000=0.77%

Run status group 0 (all jobs):
  WRITE: io=1033.0MB, aggrb=98435KB/s, minb=100798KB/s, maxb=100798KB/s, mint=10746msec, maxt=10746msec

130GBでテスト

root@kvm-server-01:~# fio -filename=/rzpool/file20G -rw=randwrite -bs=1m -size=130G -numjobs=4 -runtime=10 -group_reporting -name=file1
file1: (g=0): rw=randwrite, bs=1M-1M/1M-1M, ioengine=sync, iodepth=1
...
file1: (g=0): rw=randwrite, bs=1M-1M/1M-1M, ioengine=sync, iodepth=1
fio 1.59
Starting 4 processes
file1: Laying out IO file(s) (1 file(s) / 133120MB)
Jobs: 4 (f=4): [wwww] [100.0% done] [0K/63963K /s] [0 /61  iops] [eta 00m:00s]
file1: (groupid=0, jobs=4): err= 0: pid=3018
  write: io=606208KB, bw=57141KB/s, iops=55 , runt= 10609msec
    clat (usec): min=231 , max=1176.4K, avg=71602.53, stdev=245192.47
     lat (usec): min=265 , max=1176.5K, avg=71669.47, stdev=245192.63
    bw (KB/s) : min= 1917, max=25202, per=24.84%, avg=14191.48, stdev=5266.84
  cpu          : usr=0.10%, sys=0.89%, ctx=1191, majf=0, minf=95
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued r/w/d: total=0/592/0, short=0/0/0
     lat (usec): 250=1.86%, 500=18.07%, 750=7.09%, 1000=3.21%
     lat (msec): 2=11.49%, 4=14.19%, 10=22.80%, 20=2.20%, 50=10.64%
     lat (msec): 100=1.01%, 250=0.68%, 750=2.03%, 1000=0.68%, 2000=4.05%

Run status group 0 (all jobs):
  WRITE: io=606208KB, aggrb=57140KB/s, minb=58512KB/s, maxb=58512KB/s, mint=10609msec, maxt=10609msec