JS 的 for ... in 真的很慢

LeetCode 204 是一道线性筛题目，时间复杂度 $O(n)$ ，空间复杂度 $O(n)$ 。

原则上来说，按照线性筛写法，应该秒出（大部分语言耗时都在 100ms 内）

但是同样的代码在 JavaScript 和 TypeScript 中却超时了，而将其中的for (var j in prime)换成for (var j = 0; j < prime.length; ++j)却在 136ms 完成了运算。

简单来说，for ... in实际上是一个生成器，通过 yield 操作返回数据。相对于直接整数累加，这部分操作更为复杂（需要先构建一个对象，而后不断调用next()。但是尽管在不同的解释器中存在性能差异，该部分应该仅仅只是一个常数差异，在实际运算中应该性能差别并不大。

查看下面的代码

> start=new Date();s=Array(1000000).fill(1);for (j in s) s[j] = 1;console.log(new Date()-start)
523

> start=new Date();s=Array(1000000).fill(1);for (j=0;j<s.length;++j) s[j] = 1;console.log(new Date()-start)
22

可以看到，在数组个数为 $1000000$ 时，时间消耗已经超过 20 倍，而当数组长度扩大 $10$ 倍，前者变为 $10334$ ，后者为 $215$ 。
很显然，for ... in并非 $O(n)$ 的时间复杂度，范围扩大 $10$ 倍，耗时扩大了 $20$ 倍。

通过查阅文档， for ... in实际上更多被设计用于枚举Object，其会无序地返回迭代器。尽管对于数组而言，这里通常是有序的，但是由于仍然需要进行完整的判断，因此这里需要进行很多并不必要的操作（维护已返回的迭代器集合、检查迭代器能否枚举），这些操作将会拖慢整体的运算速度。如额外维护的 Set 结构，实际上就会花费 $O(nlog(n))$ 的时间，而实际上对于数组并不需要维护该值。

下面是相关获取属性的实现

function* EnumerateObjectProperties(obj) {
  const visited = new Set();
  for (const key of Reflect.ownKeys(obj)) {
    if (typeof key === "symbol") continue;
    const desc = Reflect.getOwnPropertyDescriptor(obj, key);
    if (desc) {
      visited.add(key);
      if (desc.enumerable) yield key;
    }
  }
  const proto = Reflect.getPrototypeOf(obj);
  if (proto === null) return;
  for (const protoKey of EnumerateObjectProperties(proto)) {
    if (!visited.has(protoKey)) yield protoKey;
  }
}

下面是 v8 中该部分的实现

// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/runtime/runtime-utils.h"

#include "src/arguments-inl.h"
#include "src/counters.h"
#include "src/elements.h"
#include "src/heap/factory.h"
#include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
#include "src/isolate-inl.h"
#include "src/keys.h"
#include "src/objects-inl.h"
#include "src/objects/module.h"

namespace v8 {
namespace internal {

namespace {

// Returns either a FixedArray or, if the given {receiver} has an enum cache
// that contains all enumerable properties of the {receiver} and its prototypes
// have none, the map of the {receiver}. This is used to speed up the check for
// deletions during a for-in.
MaybeHandle<HeapObject> Enumerate(Isolate* isolate,
                                  Handle<JSReceiver> receiver) {
  JSObject::MakePrototypesFast(receiver, kStartAtReceiver, isolate);
  FastKeyAccumulator accumulator(isolate, receiver,
                                 KeyCollectionMode::kIncludePrototypes,
                                 ENUMERABLE_STRINGS, true);
  // Test if we have an enum cache for {receiver}.
  if (!accumulator.is_receiver_simple_enum()) {
    Handle<FixedArray> keys;
    ASSIGN_RETURN_ON_EXCEPTION(
        isolate, keys, accumulator.GetKeys(GetKeysConversion::kConvertToString),
        HeapObject);
    // Test again, since cache may have been built by GetKeys() calls above.
    if (!accumulator.is_receiver_simple_enum()) return keys;
  }
  DCHECK(!receiver->IsJSModuleNamespace());
  return handle(receiver->map(), isolate);
}

// This is a slight modifcation of JSReceiver::HasProperty, dealing with
// the oddities of JSProxy and JSModuleNamespace in for-in filter.
MaybeHandle<Object> HasEnumerableProperty(Isolate* isolate,
                                          Handle<JSReceiver> receiver,
                                          Handle<Object> key) {
  bool success = false;
  Maybe<PropertyAttributes> result = Just(ABSENT);
  LookupIterator it =
      LookupIterator::PropertyOrElement(isolate, receiver, key, &success);
  if (!success) return isolate->factory()->undefined_value();
  for (; it.IsFound(); it.Next()) {
    switch (it.state()) {
      case LookupIterator::NOT_FOUND:
      case LookupIterator::TRANSITION:
        UNREACHABLE();
      case LookupIterator::JSPROXY: {
        // For proxies we have to invoke the [[GetOwnProperty]] trap.
        result = JSProxy::GetPropertyAttributes(&it);
        if (result.IsNothing()) return MaybeHandle<Object>();
        if (result.FromJust() == ABSENT) {
          // Continue lookup on the proxy's prototype.
          Handle<JSProxy> proxy = it.GetHolder<JSProxy>();
          Handle<Object> prototype;
          ASSIGN_RETURN_ON_EXCEPTION(isolate, prototype,
                                     JSProxy::GetPrototype(proxy), Object);
          if (prototype->IsNull(isolate)) {
            return isolate->factory()->undefined_value();
          }
          // We already have a stack-check in JSProxy::GetPrototype.
          return HasEnumerableProperty(
              isolate, Handle<JSReceiver>::cast(prototype), key);
        } else if (result.FromJust() & DONT_ENUM) {
          return isolate->factory()->undefined_value();
        } else {
          return it.GetName();
        }
      }
      case LookupIterator::INTERCEPTOR: {
        result = JSObject::GetPropertyAttributesWithInterceptor(&it);
        if (result.IsNothing()) return MaybeHandle<Object>();
        if (result.FromJust() != ABSENT) return it.GetName();
        continue;
      }
      case LookupIterator::ACCESS_CHECK: {
        if (it.HasAccess()) continue;
        result = JSObject::GetPropertyAttributesWithFailedAccessCheck(&it);
        if (result.IsNothing()) return MaybeHandle<Object>();
        if (result.FromJust() != ABSENT) return it.GetName();
        return isolate->factory()->undefined_value();
      }
      case LookupIterator::INTEGER_INDEXED_EXOTIC:
        // TypedArray out-of-bounds access.
        return isolate->factory()->undefined_value();
      case LookupIterator::ACCESSOR: {
        if (it.GetHolder<Object>()->IsJSModuleNamespace()) {
          result = JSModuleNamespace::GetPropertyAttributes(&it);
          if (result.IsNothing()) return MaybeHandle<Object>();
          DCHECK_EQ(0, result.FromJust() & DONT_ENUM);
        }
        return it.GetName();
      }
      case LookupIterator::DATA:
        return it.GetName();
    }
  }
  return isolate->factory()->undefined_value();
}

}  // namespace


RUNTIME_FUNCTION(Runtime_ForInEnumerate) {
  HandleScope scope(isolate);
  DCHECK_EQ(1, args.length());
  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, receiver, 0);
  RETURN_RESULT_OR_FAILURE(isolate, Enumerate(isolate, receiver));
}


RUNTIME_FUNCTION(Runtime_ForInHasProperty) {
  HandleScope scope(isolate);
  DCHECK_EQ(2, args.length());
  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, receiver, 0);
  CONVERT_ARG_HANDLE_CHECKED(Object, key, 1);
  Handle<Object> result;
  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
      isolate, result, HasEnumerableProperty(isolate, receiver, key));
  return isolate->heap()->ToBoolean(!result->IsUndefined(isolate));
}

}  // namespace internal
}  // namespace v8

如果真正需要枚举数组，应该使用传统的for(;;)或是for ... of。后者可以直接获取元素而非下标，其时间效率也在可接受的范围（相对于for(;;)稍慢，但仍然为 $O(n)$ 复杂度）

各语言性能比较

另外附上各种语言的空间、时间消耗，在大多数题目中，Java 都能做到最快（大概没有计算 Java 虚拟机的启动时间），而 Go 往往可以做到空间最优，C/C++ 紧跟其后。

而与往常 Python 并不会拉开太大差距不同，这里 Python 时间消耗也很大，大概也是由于生成器本身耗时

各语言时间、空间消耗