使用 x86 汇编实现 C# 的快速内存拷贝
日期:2014-12-24点击次数:1657
大家好,我是Oleksandr Karpov,这个是我第一次发表文章,希望大家喜欢。
在这我将为大家展示和介绍怎么样在C#和.NET下使用汇编秒速拷贝数据,在我是实例里面我用了一运用程序创建了一段视频,里面包含图片,视频和声音。
当然如果你也需要在C#使用汇编的情况,这方法给你提供一个快速简单的解决途径。
背景
理解本文的内容, 最好具备以下知识: 汇编语言, 内存对齐, c#, windows 和 .net 高级技巧(advanced techniques).
要提高数据复制(copy-past )的速度, 我们需要将内存地址按 16 个字节对齐. 否则, 速度不会有明显的改变. (我的例子大概快 1.02 倍 )
Pentium III+ (KNI/MMX2) 和 AMD Athlon (AMD EMMX) 这两种处理器都支持本文代码用到 SSE 指令集.
我用配置为: Pentium Dual-Core E5800 3.2GHz, 4GB 双通道内存的计算机做测试, 16 个字节内存对齐的速度要比标准方式快 1.5 倍, 而非内存对齐方式的速度几乎没有变化(1.02倍).
使用代码
这是一个完整的演示测试,向你展示了性能测试以及如何使用。
FastMemCopy 类包含了用于快速内存拷贝逻辑的所有内容。
首先你需要创建一个默认的Windows Forms应用程序工程,在窗体上放两个按钮,一个PictureBox 控件,因为我们将用图片来测试。
声明几个字段先:
-
string bitmapPath;
-
Bitmap bmp, bmp2;
-
BitmapData bmpd, bmpd2;
-
byte[] buffer = null;
现在创建两个方法用来处理按钮的点击事件。
标准方法如下:
-
private void btnStandard_Click(object sender, EventArgs e)
-
{
-
using (OpenFileDialog ofd = new OpenFileDialog())
-
{
-
if (ofd.ShowDialog() != System.Windows.Forms.DialogResult.OK)
-
return;
-
-
bitmapPath = ofd.FileName;
-
}
-
-
-
OpenImage();
-
-
-
UnlockBitmap();
-
-
-
CopyImage();
-
-
-
LockBitmap();
-
-
-
pictureBox1.Image = bmp2;
-
}
快速方法如下:
-
private void btnFast_Click(object sender, EventArgs e)
-
{
-
using (OpenFileDialog ofd = new OpenFileDialog())
-
{
-
if (ofd.ShowDialog() != System.Windows.Forms.DialogResult.OK)
-
return;
-
bitmapPath = ofd.FileName;
-
}
-
-
-
OpenImage();
-
-
-
UnlockBitmap();
-
-
-
FastCopyImage();
-
-
-
LockBitmap();
-
-
-
pictureBox1.Image = bmp2;
-
}
好的,现在我们有按钮并且也有了事件处理,下面来实现打开图片、锁定、解锁它们的方法,以及标准拷贝方法:
打开一个图片:
-
void OpenImage()
-
{
-
pictureBox1.Image = null;
-
buffer = null;
-
if (bmp != null)
-
{
-
bmp.Dispose();
-
bmp = null;
-
}
-
if (bmp2 != null)
-
{
-
bmp2.Dispose();
-
bmp2 = null;
-
}
-
GC.Collect(GC.MaxGeneration, GCCollectionMode.Forced);
-
-
bmp = (Bitmap)Bitmap.FromFile(bitmapPath);
-
-
buffer = new byte[bmp.Width * 4 * bmp.Height];
-
bmp2 = new Bitmap(bmp.Width, bmp.Height, bmp.Width * 4, PixelFormat.Format32bppArgb,
-
Marshal.UnsafeAddrOfPinnedArrayElement(buffer, 0));
-
}
锁定和解锁位图:
-
void UnlockBitmap()
-
{
-
bmpd = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite,
-
PixelFormat.Format32bppArgb);
-
bmpd2 = bmp2.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite,
-
PixelFormat.Format32bppArgb);
-
}
-
-
void LockBitmap()
-
{
-
bmp.UnlockBits(bmpd);
-
bmp2.UnlockBits(bmpd2);
-
}
从一个图片拷贝数据到另一个图片,并且显示测得的时间:
-
void CopyImage()
-
{
-
-
Stopwatch sw = new Stopwatch();
-
sw.Start();
-
-
-
for (int i = 0; i < 10; i++)
-
{
-
System.Runtime.InteropServices.Marshal.Copy(bmpd.Scan0, buffer, 0, buffer.Length);
-
}
-
-
-
sw.Stop();
-
-
-
MessageBox.Show(sw.ElapsedTicks.ToString());
-
}
这就是标准快速拷贝方法。其实一点也不复杂,我们使用了知名的 System.Runtime.InteropServices.Marshal.Copy 方法。
以及又一个“中间方法(middle-method)”以用于快速拷贝逻辑:
-
void FastCopyImage()
-
{
-
FastMemCopy.FastMemoryCopy(bmpd.Scan0, bmpd2.Scan0, buffer.Length);
-
}
现在,来实现FastMemCopy类。下面是类的声明以及我们将会在类中使用到的一些类型:
-
internal static class FastMemCopy
-
{
-
[Flags]
-
private enum AllocationTypes : uint
-
{
-
Commit = 0x1000, Reserve = 0x2000,
-
Reset = 0x80000, LargePages = 0x20000000,
-
Physical = 0x400000, TopDown = 0x100000,
-
WriteWatch = 0x200000
-
}
-
-
[Flags]
-
private enum MemoryProtections : uint
-
{
-
Execute = 0x10, ExecuteRead = 0x20,
-
ExecuteReadWrite = 0x40, ExecuteWriteCopy = 0x80,
-
NoAccess = 0x01, ReadOnly = 0x02,
-
ReadWrite = 0x04, WriteCopy = 0x08,
-
GuartModifierflag = 0x100, NoCacheModifierflag = 0x200,
-
WriteCombineModifierflag = 0x400
-
}
-
-
[Flags]
-
private enum FreeTypes : uint
-
{
-
Decommit = 0x4000, Release = 0x8000
-
}
-
-
[UnmanagedFunctionPointerAttribute(CallingConvention.Cdecl)]
-
private unsafe delegate void FastMemCopyDelegate();
-
-
private static class NativeMethods
-
{
-
[DllImport("kernel32.dll", SetLastError = true)]
-
internal static extern IntPtr VirtualAlloc(
-
IntPtr lpAddress,
-
UIntPtr dwSize,
-
AllocationTypes flAllocationType,
-
MemoryProtections flProtect);
-
-
[DllImport("kernel32")]
-
[return: MarshalAs(UnmanagedType.Bool)]
-
internal static extern bool VirtualFree(
-
IntPtr lpAddress,
-
uint dwSize,
-
FreeTypes flFreeType);
-
}
现在声明方法本身:
-
public static unsafe void FastMemoryCopy(IntPtr src, IntPtr dst, int nBytes)
-
{
-
if (IntPtr.Size == 4)
-
{
-
-
-
-
IntPtr p = NativeMethods.VirtualAlloc(
-
IntPtr.Zero,
-
new UIntPtr((uint)x86_FastMemCopy_New.Length),
-
AllocationTypes.Commit | AllocationTypes.Reserve,
-
MemoryProtections.ExecuteReadWrite);
-
-
try
-
{
-
-
Marshal.Copy(x86_FastMemCopy_New, 0, p, x86_FastMemCopy_New.Length);
-
-
-
FastMemCopyDelegate _fastmemcopy =
-
(FastMemCopyDelegate)Marshal.GetDelegateForFunctionPointer(p,
-
typeof(FastMemCopyDelegate));
-
-
-
p += x86_FastMemCopy_New.Length;
-
-
-
p -= 8;
-
Marshal.Copy(BitConverter.GetBytes((long)nBytes), 0, p, 4);
-
-
-
p -= 8;
-
Marshal.Copy(BitConverter.GetBytes((long)dst), 0, p, 4);
-
-
-
p -= 8;
-
Marshal.Copy(BitConverter.GetBytes((long)src), 0, p, 4);
-
-
-
Stopwatch sw = new Stopwatch();
-
sw.Start();
-
-
-
for (int i = 0; i < 10; i++)
-
_fastmemcopy();
-
-
-
sw.Stop();
-
-
-
System.Windows.Forms.MessageBox.Show(sw.ElapsedTicks.ToString());
-
}
-
catch (Exception ex)
-
{
-
-
System.Windows.Forms.MessageBox.Show(ex.Message);
-
}
-
finally
-
{
-
-
NativeMethods.VirtualFree(p, (uint)(x86_FastMemCopy_New.Length),
-
FreeTypes.Release);
-
GC.Collect(GC.MaxGeneration, GCCollectionMode.Forced);
-
}
-
}
-
else if (IntPtr.Size == 8)
-
{
-
throw new ApplicationException("x64 is not supported yet!");
-
}
-
}
汇编代码被表示成带注释的字节数组:
-
private static byte[] x86_FastMemCopy_New = new byte[]
-
{
-
0x90,
-
0x60,
-
0x95,
-
0x8B, 0xB5, 0x5A, 0x01, 0x00, 0x00,
-
0x89, 0xF0,
-
0x83, 0xE0, 0x0F,
-
0x8B, 0xBD, 0x62, 0x01, 0x00, 0x00,
-
0x89, 0xFB,
-
0x83, 0xE3, 0x0F,
-
0x8B, 0x8D, 0x6A, 0x01, 0x00, 0x00,
-
0xC1, 0xE9, 0x07,
-
0x85, 0xC9,
-
0x0F, 0x84, 0x1C, 0x01, 0x00, 0x00,
-
0x0F, 0x18, 0x06,
-
0x85, 0xC0,
-
0x0F, 0x84, 0x8B, 0x00, 0x00, 0x00,
-
0x0F, 0x18, 0x86, 0x80, 0x02, 0x00, 0x00,
-
0x0F, 0x10, 0x06,
-
0x0F, 0x10, 0x4E, 0x10,
-
0x0F, 0x10, 0x56, 0x20,
-
0x0F, 0x18, 0x86, 0xC0, 0x02, 0x00, 0x00,
-
0x0F, 0x10, 0x5E, 0x30,
-
0x0F, 0x10, 0x66, 0x40,
-
0x0F, 0x10, 0x6E, 0x50,
-
0x0F, 0x10, 0x76, 0x60,
-
0x0F, 0x10, 0x7E, 0x70,
-
0x85, 0xDB,
-
0x74, 0x21,
-
0x0F, 0x11, 0x07,
-
0x0F, 0x11, 0x4F, 0x10,
-
0x0F, 0x11, 0x57, 0x20,
-
0x0F, 0x11, 0x5F, 0x30,
-
0x0F, 0x11, 0x67, 0x40,
-
0x0F, 0x11, 0x6F, 0x50,
-
0x0F, 0x11, 0x77, 0x60,
-
0x0F, 0x11, 0x7F, 0x70,
-
0xEB, 0x1F,
-
0x0F, 0x2B, 0x07,
-
0x0F, 0x2B, 0x4F, 0x10,
-
0x0F, 0x2B, 0x57, 0x20,
-
0x0F, 0x2B, 0x5F, 0x30,
-
0x0F, 0x2B, 0x67, 0x40,
-
0x0F, 0x2B, 0x6F, 0x50,
-
0x0F, 0x2B, 0x77, 0x60,
-
0x0F, 0x2B, 0x7F, 0x70,
-
0x81, 0xC6, 0x80, 0x00, 0x00, 0x00,
-
0x81, 0xC7, 0x80, 0x00, 0x00, 0x00,
-
0x83, 0xE9, 0x01,
-
0x0F, 0x85, 0x7A, 0xFF, 0xFF, 0xFF,
-
0xE9, 0x86, 0x00, 0x00, 0x00,
-
-
0x0F, 0x18, 0x86, 0x80, 0x02, 0x00, 0x00,
-
0x0F, 0x28, 0x06,
-
0x0F, 0x28, 0x4E, 0x10,
-
0x0F, 0x28, 0x56, 0x20,
-
0x0F, 0x18, 0x86, 0xC0, 0x02, 0x00, 0x00,
-
0x0F, 0x28, 0x5E, 0x30,
-
0x0F, 0x28, 0x66, 0x40,
-
0x0F, 0x28, 0x6E, 0x50,
-
0x0F, 0x28, 0x76, 0x60,
-
0x0F, 0x28, 0x7E, 0x70,
-
0x85, 0xDB,
-
0x74, 0x21,
-
0x0F, 0x11, 0x07,
-
0x0F, 0x11, 0x4F, 0x10,
-
0x0F, 0x11, 0x57, 0x20,
-
0x0F, 0x11, 0x5F, 0x30,
-
0x0F, 0x11, 0x67, 0x40,
-
0x0F, 0x11, 0x6F, 0x50,
-
0x0F, 0x11, 0x77, 0x60,
-
0x0F, 0x11, 0x7F, 0x70,
-
0xEB, 0x1F,
-
0x0F, 0x2B, 0x07,
-
0x0F, 0x2B, 0x4F, 0x10,
-
0x0F, 0x2B, 0x57, 0x20,
-
0x0F, 0x2B, 0x5F, 0x30,
-
0x0F, 0x2B, 0x67, 0x40,
-
0x0F, 0x2B, 0x6F, 0x50,
-
0x0F, 0x2B, 0x77, 0x60,
-
0x0F, 0x2B, 0x7F, 0x70,
-
0x81, 0xC6, 0x80, 0x00, 0x00, 0x00,
-
0x81, 0xC7, 0x80, 0x00, 0x00, 0x00,
-
0x83, 0xE9, 0x01,
-
0x0F, 0x85, 0x7A, 0xFF, 0xFF, 0xFF,
-
0x8B, 0x8D, 0x6A, 0x01, 0x00, 0x00,
-
0x83, 0xE1, 0x7F,
-
0x85, 0xC9,
-
0x74, 0x02,
-
0xF3, 0xA4,
-
0x0F, 0xAE, 0xF8,
-
0x61,
-
0xC3,
-
-
0x00, 0x00, 0x00, 0x00,
-
0x00, 0x00, 0x00, 0x00,
-
-
0x00, 0x00, 0x00, 0x00,
-
0x00, 0x00, 0x00, 0x00,
-
-
0x00, 0x00, 0x00, 0x00,
-
0x00, 0x00, 0x00, 0x00
-
};
(文/开源中国社区编译)