JS 的 for ... in 真的很慢

LeetCode 204 是一道线性筛题目,时间复杂度 O(n)O(n),空间复杂度 O(n)O(n)

原则上来说,按照线性筛写法,应该秒出(大部分语言耗时都在 100ms 内)

但是同样的代码在 JavaScript 和 TypeScript 中却超时了,而将其中的for (var j in prime)换成for (var j = 0; j < prime.length; ++j)却在 136ms 完成了运算。

简单来说,for ... in实际上是一个生成器,通过 yield 操作返回数据。相对于直接整数累加,这部分操作更为复杂(需要先构建一个对象,而后不断调用next()。但是尽管在不同的解释器中存在性能差异,该部分应该仅仅只是一个常数差异,在实际运算中应该性能差别并不大。

查看下面的代码

> start=new Date();s=Array(1000000).fill(1);for (j in s) s[j] = 1;console.log(new Date()-start)
523

> start=new Date();s=Array(1000000).fill(1);for (j=0;j<s.length;++j) s[j] = 1;console.log(new Date()-start)
22

可以看到,在数组个数为 10000001000000 时,时间消耗已经超过 20 倍,而当数组长度扩大 1010 倍,前者变为 1033410334,后者为 215215
很显然,for ... in并非 O(n)O(n) 的时间复杂度,范围扩大 1010 倍,耗时扩大了 2020 倍。

通过查阅文档, for ... in实际上更多被设计用于枚举Object,其会无序地返回迭代器。尽管对于数组而言,这里通常是有序的,但是由于仍然需要进行完整的判断,因此这里需要进行很多并不必要的操作(维护已返回的迭代器集合、检查迭代器能否枚举),这些操作将会拖慢整体的运算速度。如额外维护的 Set 结构,实际上就会花费 O(nlog(n))O(nlog(n)) 的时间,而实际上对于数组并不需要维护该值。

下面是相关获取属性的实现

function* EnumerateObjectProperties(obj) {
  const visited = new Set();
  for (const key of Reflect.ownKeys(obj)) {
    if (typeof key === "symbol") continue;
    const desc = Reflect.getOwnPropertyDescriptor(obj, key);
    if (desc) {
      visited.add(key);
      if (desc.enumerable) yield key;
    }
  }
  const proto = Reflect.getPrototypeOf(obj);
  if (proto === null) return;
  for (const protoKey of EnumerateObjectProperties(proto)) {
    if (!visited.has(protoKey)) yield protoKey;
  }
}

下面是 v8 中该部分的实现

// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/runtime/runtime-utils.h"

#include "src/arguments-inl.h"
#include "src/counters.h"
#include "src/elements.h"
#include "src/heap/factory.h"
#include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
#include "src/isolate-inl.h"
#include "src/keys.h"
#include "src/objects-inl.h"
#include "src/objects/module.h"

namespace v8 {
namespace internal {

namespace {

// Returns either a FixedArray or, if the given {receiver} has an enum cache
// that contains all enumerable properties of the {receiver} and its prototypes
// have none, the map of the {receiver}. This is used to speed up the check for
// deletions during a for-in.
MaybeHandle<HeapObject> Enumerate(Isolate* isolate,
                                  Handle<JSReceiver> receiver) {
  JSObject::MakePrototypesFast(receiver, kStartAtReceiver, isolate);
  FastKeyAccumulator accumulator(isolate, receiver,
                                 KeyCollectionMode::kIncludePrototypes,
                                 ENUMERABLE_STRINGS, true);
  // Test if we have an enum cache for {receiver}.
  if (!accumulator.is_receiver_simple_enum()) {
    Handle<FixedArray> keys;
    ASSIGN_RETURN_ON_EXCEPTION(
        isolate, keys, accumulator.GetKeys(GetKeysConversion::kConvertToString),
        HeapObject);
    // Test again, since cache may have been built by GetKeys() calls above.
    if (!accumulator.is_receiver_simple_enum()) return keys;
  }
  DCHECK(!receiver->IsJSModuleNamespace());
  return handle(receiver->map(), isolate);
}

// This is a slight modifcation of JSReceiver::HasProperty, dealing with
// the oddities of JSProxy and JSModuleNamespace in for-in filter.
MaybeHandle<Object> HasEnumerableProperty(Isolate* isolate,
                                          Handle<JSReceiver> receiver,
                                          Handle<Object> key) {
  bool success = false;
  Maybe<PropertyAttributes> result = Just(ABSENT);
  LookupIterator it =
      LookupIterator::PropertyOrElement(isolate, receiver, key, &success);
  if (!success) return isolate->factory()->undefined_value();
  for (; it.IsFound(); it.Next()) {
    switch (it.state()) {
      case LookupIterator::NOT_FOUND:
      case LookupIterator::TRANSITION:
        UNREACHABLE();
      case LookupIterator::JSPROXY: {
        // For proxies we have to invoke the [[GetOwnProperty]] trap.
        result = JSProxy::GetPropertyAttributes(&it);
        if (result.IsNothing()) return MaybeHandle<Object>();
        if (result.FromJust() == ABSENT) {
          // Continue lookup on the proxy's prototype.
          Handle<JSProxy> proxy = it.GetHolder<JSProxy>();
          Handle<Object> prototype;
          ASSIGN_RETURN_ON_EXCEPTION(isolate, prototype,
                                     JSProxy::GetPrototype(proxy), Object);
          if (prototype->IsNull(isolate)) {
            return isolate->factory()->undefined_value();
          }
          // We already have a stack-check in JSProxy::GetPrototype.
          return HasEnumerableProperty(
              isolate, Handle<JSReceiver>::cast(prototype), key);
        } else if (result.FromJust() & DONT_ENUM) {
          return isolate->factory()->undefined_value();
        } else {
          return it.GetName();
        }
      }
      case LookupIterator::INTERCEPTOR: {
        result = JSObject::GetPropertyAttributesWithInterceptor(&it);
        if (result.IsNothing()) return MaybeHandle<Object>();
        if (result.FromJust() != ABSENT) return it.GetName();
        continue;
      }
      case LookupIterator::ACCESS_CHECK: {
        if (it.HasAccess()) continue;
        result = JSObject::GetPropertyAttributesWithFailedAccessCheck(&it);
        if (result.IsNothing()) return MaybeHandle<Object>();
        if (result.FromJust() != ABSENT) return it.GetName();
        return isolate->factory()->undefined_value();
      }
      case LookupIterator::INTEGER_INDEXED_EXOTIC:
        // TypedArray out-of-bounds access.
        return isolate->factory()->undefined_value();
      case LookupIterator::ACCESSOR: {
        if (it.GetHolder<Object>()->IsJSModuleNamespace()) {
          result = JSModuleNamespace::GetPropertyAttributes(&it);
          if (result.IsNothing()) return MaybeHandle<Object>();
          DCHECK_EQ(0, result.FromJust() & DONT_ENUM);
        }
        return it.GetName();
      }
      case LookupIterator::DATA:
        return it.GetName();
    }
  }
  return isolate->factory()->undefined_value();
}

}  // namespace


RUNTIME_FUNCTION(Runtime_ForInEnumerate) {
  HandleScope scope(isolate);
  DCHECK_EQ(1, args.length());
  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, receiver, 0);
  RETURN_RESULT_OR_FAILURE(isolate, Enumerate(isolate, receiver));
}


RUNTIME_FUNCTION(Runtime_ForInHasProperty) {
  HandleScope scope(isolate);
  DCHECK_EQ(2, args.length());
  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, receiver, 0);
  CONVERT_ARG_HANDLE_CHECKED(Object, key, 1);
  Handle<Object> result;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, result, HasEnumerableProperty(isolate, receiver, key));
  return isolate->heap()->ToBoolean(!result->IsUndefined(isolate));
}

}  // namespace internal
}  // namespace v8

如果真正需要枚举数组,应该使用传统的for(;;)或是for ... of。后者可以直接获取元素而非下标,其时间效率也在可接受的范围(相对于for(;;)稍慢,但仍然为 O(n)O(n) 复杂度)

各语言性能比较

另外附上各种语言的空间、时间消耗,在大多数题目中,Java 都能做到最快(大概没有计算 Java 虚拟机的启动时间),而 Go 往往可以做到空间最优,C/C++ 紧跟其后。

而与往常 Python 并不会拉开太大差距不同,这里 Python 时间消耗也很大,大概也是由于生成器本身耗时

各语言时间、空间消耗各语言时间、空间消耗

参考资料