luaL_ref performance in Lua 5.2.0-work4

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

luaL_ref performance in Lua 5.2.0-work4

Joshua Jensen
  I was looking through the luaL_ref code, and I noticed the new
implementation uses a string as the look up key.  This is certainly
slower than a number look up, especially when used a lot.

I do a lot of data manipulation via Lua, so I was curious what the end
result was.

The following benchmarks are run on a Core i7 laptop.

An implementation of Lua 4-style refs is available in patch form below.  
I refer to them as fastrefs.

The LuaObject portion of the benchmark uses LuaPlus running against Lua 5.1.

Lua 5.1 - 1,000,000 luarefs - 3 loops: 250-281 milliseconds
Lua 5.2 - 1,000,000 luarefs - 3 loops: 530-546 milliseconds

Lua 5.1 - 1,000,000 fastrefs - 3 loops: 93-109 milliseconds
Lua 5.2 - 1,000,000 fastrefs - 3 loops: 125-141 milliseconds

Lua 5.1 - 1,000,000 LuaObjects - 3 loops: 125-140 milliseconds



Lua 5.1 - 1,000,000 luarefs - combined 1 loop: 250-265 milliseconds
Lua 5.2 - 1,000,000 luarefs - combined 1 loop: 499-546 milliseconds

Lua 5.1 - 1,000,000 fastrefs - combined 1 loop: 78-94 milliseconds
Lua 5.2 - 1,000,000 fastrefs - combined 1 loop: 109-125 milliseconds

Lua 5.1 - 1,000,000 LuaObjects - combined 1 loop: 78-94 milliseconds


Of note are the Lua 5.2 performance losses especially with regard to the
built in luaL_ref and company being twice as slow as their Lua 5.1
counterpart.  I tried changing the freelist string back to an integer
and using lua_raw[gs]eti.  There was a speed up, but it didn't bring Lua
5.2 back on par with Lua 5.1 numbers.

This is the first I've really looked at Lua 5.2.  Are there any thoughts
as to why the performance loss is so large?

Thanks!

Josh

(The benchmark code is below.)


 From 0f4ce5b7c970b5bba74d2674c449ddb9bdc7fe0f Mon Sep 17 00:00:00 2001
From: Joshua Jensen <[hidden email]>
Date: Mon, 2 Aug 2010 22:18:43 -0600
Subject: [PATCH] * Add lua_fastref support.

---
  src/lapi.c    |   79
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  src/lgc.c     |   15 +++++++++++
  src/lstate.c  |    6 ++++
  src/lstate.h  |   20 ++++++++++++++
  src/lua.h     |    9 ++++++
  src/luaconf.h |    3 ++
  6 files changed, 132 insertions(+), 0 deletions(-)

diff --git a/src/lapi.c b/src/lapi.c
index 2d7c336..a4e0bb4 100644
--- a/src/lapi.c
+++ b/src/lapi.c
@@ -51,6 +51,18 @@ static TValue *index2addr (lua_State *L, int idx) {
      api_check(L, idx != 0 && -idx <= L->top - (ci->func + 1), "invalid
index");
      return L->top + idx;
    }
+#if LUA_FASTREF
+  else if (idx <= LUA_FASTREFNIL) {
+    struct lua_Ref* refobj;
+    if (idx == LUA_FASTREFNIL)
+      return &G(L)->refNilValue;
+    idx = -idx + LUA_FASTREFNIL - 1;
+    refobj = &G(L)->refArray[idx];
+    if (0 <= idx && idx < G(L)->refSize && refobj->st == LUA_FASTREF_LOCK)
+      return &refobj->o;
+    return &G(L)->refNilValue;
+  }
+#endif /* LUA_FASTREF */
    else if (idx == LUA_REGISTRYINDEX)
      return &G(L)->l_registry;
    else {  /* upvalues */
@@ -1178,3 +1190,70 @@ LUA_API void lua_upvaluejoin (lua_State *L, int
fidx1, int n1,
    luaC_objbarrier(L, f1, *up2);
  }

+#if LUA_FASTREF
+
+LUA_API int lua_getfastref (lua_State *L, int ref) {
+  struct lua_Ref* refobj;
+  if (ref == LUA_FASTREFNIL) {
+    ttype(L->top) = LUA_TNIL;
+    api_incr_top(L);
+    return 1;
+  }
+  ref = -ref + LUA_FASTREFNIL - 1;
+  refobj = &G(L)->refArray[ref];
+  if (0 <= ref && ref < G(L)->refSize && refobj->st == LUA_FASTREF_LOCK)
+    *L->top = refobj->o;
+  else
+    return 0;
+  api_incr_top(L);
+  return 1;
+}
+
+
+LUA_API int lua_fastrefindex (lua_State *L, int idx) {
+  int ref;
+  TValue* value = index2addr(L, idx);
+  if (ttype(value) == LUA_TNIL)
+    return LUA_FASTREFNIL;
+  else {
+    global_State *g = G(L);
+    struct lua_Ref* refobj;
+    if (g->refFree == LUA_FASTREF_NONEXT) {  /* is there a free place? */
+      int origsize = g->refSize;
+      int i;
+      luaM_growvector(L, g->refArray, g->refSize, g->refSize, struct
lua_Ref,
+                      MAX_INT, "reference table overflow");
+      for (i = g->refSize - 1; i >= origsize; --i) {
+        g->refArray[i].st = g->refFree;
+        g->refFree = i;
+      }
+      value = index2addr(L, idx);
+    }
+    ref = g->refFree;
+    refobj = &g->refArray[ref];
+    g->refFree = refobj->st;
+    refobj->o = *value;
+    refobj->st = LUA_FASTREF_LOCK;
+  }
+  return LUA_FASTREFNIL - 1 - ref;
+}
+
+
+LUA_API int lua_fastref (lua_State *L) {
+  int ref = lua_fastrefindex(L, -1);
+  lua_pop(L, 1);
+  return ref;
+}
+
+
+LUA_API void lua_fastunref (lua_State *L, int ref) {
+  ref = -ref + LUA_FASTREFNIL - 1;
+  if (ref >= 0) {
+    global_State* g = G(L);
+    luai_apicheck(L, ref < g->refSize && g->refArray[ref].st < 0); //,
"invalid ref");
+    g->refArray[ref].st = g->refFree;
+    g->refFree = ref;
+  }
+}
+
+#endif /* LUA_FASTREF */
diff --git a/src/lgc.c b/src/lgc.c
index fcaca1b..16b25bf 100644
--- a/src/lgc.c
+++ b/src/lgc.c
@@ -847,6 +847,18 @@ void luaC_freeallobjects (lua_State *L) {
  }


+#if LUA_FASTREF
+static void marklock (global_State *g)
+{
+  int i;
+  for (i=0; i<g->refSize; i++) {
+    if (g->refArray[i].st == LUA_FASTREF_LOCK)
+      markvalue(g, &g->refArray[i].o);
+  }
+}
+#endif /* LUA_FASTREF */
+
+
  static void atomic (lua_State *L) {
    global_State *g = G(L);
    lua_assert(!iswhite(obj2gco(g->mainthread)));
@@ -854,6 +866,9 @@ static void atomic (lua_State *L) {
    /* registry and global metatables may be changed by API */
    markvalue(g, &g->l_registry);
    markmt(g);  /* mark basic metatables */
+#if LUA_FASTREF
+  marklock(g); /* mark locked objects */
+#endif /* LUA_FASTREF */
    /* remark occasional upvalues of (maybe) dead threads */
    remarkupvals(g);
    /* traverse objects caught by write barrier and by 'remarkupvals' */
diff --git a/src/lstate.c b/src/lstate.c
index 6ea00b6..d7d31c5 100644
--- a/src/lstate.c
+++ b/src/lstate.c
@@ -255,6 +255,12 @@ LUA_API lua_State *lua_newstate (lua_Alloc f, void
*ud) {
    g->totalbytes = sizeof(LG);
    g->gcpause = LUAI_GCPAUSE;
    g->gcstepmul = LUAI_GCMUL;
+#if LUA_FASTREF
+  g->refArray = NULL;
+  g->refSize = 0;
+  g->refFree = LUA_FASTREF_NONEXT;
+  setnilvalue(&g->refNilValue);
+#endif /* LUA_FASTREF */
    for (i=0; i < LUA_NUMTAGS; i++) g->mt[i] = NULL;
    if (luaD_rawrunprotected(L, f_luaopen, NULL) != LUA_OK) {
      /* memory allocation error: free partial state */
diff --git a/src/lstate.h b/src/lstate.h
index e829ed4..9dd6188 100644
--- a/src/lstate.h
+++ b/src/lstate.h
@@ -39,6 +39,20 @@
  */


+/*
+** marks for Reference array
+*/
+#define LUA_FASTREF_NONEXT          -1      /* to end the free list */
+#define LUA_FASTREF_LOCK            -4
+
+
+struct lua_Ref {
+  TValue o;
+  int st;  /* can be LUA_FASTREF_LOCK, LUA_FASTREF_NONEXT, or next (for
free list) */
+};
+
+
+
  struct lua_longjmp;  /* defined in ldo.c */


@@ -143,6 +157,12 @@ typedef struct global_State {
    TString *memerrmsg;  /* memory-error message */
    TString *tmname[TM_N];  /* array with tag-method names */
    struct Table *mt[LUA_NUMTAGS];  /* metatables for basic types */
+#if LUA_FASTREF
+  struct lua_Ref *refArray;  /* locked objects */
+  int refSize;  /* size of refArray */
+  int refFree;  /* list of free positions in refArray */
+  TValue refNilValue;
+#endif /* LUA_FASTREF */
  } global_State;


diff --git a/src/lua.h b/src/lua.h
index d1cf33a..4c501bb 100644
--- a/src/lua.h
+++ b/src/lua.h
@@ -406,6 +406,15 @@ struct lua_Debug {

  /*
}====================================================================== */

+#if LUA_FASTREF
+/* pre-defined references */
+#define LUA_FASTREFNIL    (-1999999)
+
+LUA_API int lua_getfastref (lua_State *L, int ref);
+LUA_API int lua_fastrefindex (lua_State *L, int idx);
+LUA_API int lua_fastref (lua_State *L);
+LUA_API void lua_fastunref (lua_State *L, int ref);
+#endif /* LUA_FASTREF */

  /******************************************************************************
  * Copyright (C) 1994-2010 Lua.org, PUC-Rio.  All rights reserved.
diff --git a/src/luaconf.h b/src/luaconf.h
index 12ec5fc..3559961 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -11,6 +11,9 @@
  #include <limits.h>
  #include <stddef.h>

+#if !defined(LUA_FASTREF)
+#define LUA_FASTREF 1
+#endif /* LUA_FASTREF */

  /*
  ** ==================================================================
--
1.7.2.msysgit.1


------------------------------------------------------------------------------------------------------------------------

extern "C"
{
#include "lua.h"
#include "lauxlib.h"
#include "lualib.h"
}

static int pmain(lua_State *L)
{
     luaL_openlibs(L);
     return 0;
}

void RefTest()
{
     lua_State *L = luaL_newstate();
     lua_pushcfunction(L, &pmain);
     lua_pcall(L, 0, 0, 0);

     for (int loop = 0; loop < 3; ++loop)
     {
         int* refs = new int[1000000];
         DWORD time = GetTickCount();
         for (int index = 1; index < 1000000; ++index)
         {
             lua_getglobal(L, "table");
             refs[index - 1] = lua_fastref(L);
         }
         for (int index = 1; index < 1000000; ++index)
         {
             lua_type(L, refs[index - 1]);
         }
         for (int index = 1; index < 1000000; ++index)
         {
             lua_fastunref(L, refs[index - 1]);
         }
         delete[] refs;
         time = GetTickCount() - time;
         printf("fastref(%d): %d\n", loop, time);
     }

     for (int loop = 0; loop < 3; ++loop)
     {
         int* refs = new int[1000000];
         DWORD time = GetTickCount();
         for (int index = 1; index < 1000000; ++index)
         {
             lua_getglobal(L, "table");
             refs[index - 1] = luaL_ref(L, LUA_REGISTRYINDEX);
         }
         for (int index = 1; index < 1000000; ++index)
         {
             lua_rawgeti(L, LUA_REGISTRYINDEX, refs[index - 1]);
             lua_type(L, -1);
             lua_pop(L, 1);
         }
         for (int index = 1; index < 1000000; ++index)
         {
             luaL_unref(L, LUA_REGISTRYINDEX, refs[index - 1]);
         }
         delete[] refs;
         time = GetTickCount() - time;
         printf("luaref(%d): %d\n", loop, time);
     }

     for (int loop = 0; loop < 3; ++loop)
     {
         DWORD time = GetTickCount();
         for (int index = 1; index < 1000000; ++index)
         {
             int ref;
             lua_getglobal(L, "table");
             ref = lua_fastref(L);
             lua_type(L, ref);
             lua_fastunref(L, ref);
         }
         time = GetTickCount() - time;
         printf("fastref-oneloop(%d): %d\n", loop, time);
     }

     for (int loop = 0; loop < 3; ++loop)
     {
         DWORD time = GetTickCount();
         for (int index = 1; index < 1000000; ++index)
         {
             int ref;
             lua_getglobal(L, "table");
             ref = luaL_ref(L, LUA_REGISTRYINDEX);

             lua_rawgeti(L, LUA_REGISTRYINDEX, ref);
             lua_type(L, -1);
             lua_pop(L, 1);

             luaL_unref(L, LUA_REGISTRYINDEX, ref);
         }
         time = GetTickCount() - time;
         printf("luaref-oneloop(%d): %d\n", loop, time);
     }
}



Reply | Threaded
Open this post in threaded view
|

Re: luaL_ref performance in Lua 5.2.0-work4

Roberto Ierusalimschy
> An implementation of Lua 4-style refs is available in patch form
> below.  I refer to them as fastrefs.

What is "Lua 4-style refs"?


> This is the first I've really looked at Lua 5.2.  Are there any
> thoughts as to why the performance loss is so large?

No. Are both versions compiled with the same options and compiler version?

-- Roberto
Reply | Threaded
Open this post in threaded view
|

Re: luaL_ref performance in Lua 5.2.0-work4

Joshua Jensen
  ----- Original Message -----
From: Roberto Ierusalimschy
Date: 8/3/2010 6:58 AM
>> An implementation of Lua 4-style refs is available in patch form
>> below.  I refer to them as fastrefs.
> What is "Lua 4-style refs"?
In Lua 4.0.1, lua_ref()/lua_getref()/lua_unref() are all written around
a very fast C-based architecture for handling the refs.  In Lua 5, this
was generalized into actual Lua data structures, and there was a
performance hit for doing so.  In Lua 5.2, some additional slowdown has
occurred, part of which is an even slower string based look up.

I refer to Lua 4-style refs (or fastrefs, as I called them in the
original email) as being a movement of the implementation from Lua 4
into Lua 5.  I have made some additional minor updates to it, but the
fastref implementation is largely derived from Lua 4.
>> This is the first I've really looked at Lua 5.2.  Are there any
>> thoughts as to why the performance loss is so large?
> No. Are both versions compiled with the same options and compiler version?
Yes.  The same options and compiler version were used for the benchmark.

I'll have to trace code to see what might be going on.  Perhaps the
garbage collector is kicking in and skewing the results further.

Josh