JS 的 for ... in 真的很慢
LeetCode 204 是一道线性筛题目,时间复杂度 ,空间复杂度 。
原则上来说,按照线性筛写法,应该秒出(大部分语言耗时都在 100ms 内)
但是同样的代码在 JavaScript 和 TypeScript 中却超时了,而将其中的for (var j in prime)
换成for (var j = 0; j < prime.length; ++j)
却在 136ms 完成了运算。
简单来说,for ... in
实际上是一个生成器,通过 yield 操作返回数据。相对于直接整数累加,这部分操作更为复杂(需要先构建一个对象,而后不断调用next()
。但是尽管在不同的解释器中存在性能差异,该部分应该仅仅只是一个常数差异,在实际运算中应该性能差别并不大。
查看下面的代码
> start=new Date();s=Array(1000000).fill(1);for (j in s) s[j] = 1;console.log(new Date()-start) 523 > start=new Date();s=Array(1000000).fill(1);for (j=0;j<s.length;++j) s[j] = 1;console.log(new Date()-start) 22
可以看到,在数组个数为 时,时间消耗已经超过 20 倍,而当数组长度扩大 倍,前者变为 ,后者为 。
很显然,for ... in
并非 的时间复杂度,范围扩大 倍,耗时扩大了 倍。
通过查阅文档, for ... in
实际上更多被设计用于枚举Object
,其会无序地返回迭代器。尽管对于数组而言,这里通常是有序的,但是由于仍然需要进行完整的判断,因此这里需要进行很多并不必要的操作(维护已返回的迭代器集合、检查迭代器能否枚举),这些操作将会拖慢整体的运算速度。如额外维护的 Set 结构,实际上就会花费 的时间,而实际上对于数组并不需要维护该值。
下面是相关获取属性的实现
function* EnumerateObjectProperties(obj) { const visited = new Set(); for (const key of Reflect.ownKeys(obj)) { if (typeof key === "symbol") continue; const desc = Reflect.getOwnPropertyDescriptor(obj, key); if (desc) { visited.add(key); if (desc.enumerable) yield key; } } const proto = Reflect.getPrototypeOf(obj); if (proto === null) return; for (const protoKey of EnumerateObjectProperties(proto)) { if (!visited.has(protoKey)) yield protoKey; } }
下面是 v8 中该部分的实现
// Copyright 2015 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/runtime/runtime-utils.h" #include "src/arguments-inl.h" #include "src/counters.h" #include "src/elements.h" #include "src/heap/factory.h" #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop. #include "src/isolate-inl.h" #include "src/keys.h" #include "src/objects-inl.h" #include "src/objects/module.h" namespace v8 { namespace internal { namespace { // Returns either a FixedArray or, if the given {receiver} has an enum cache // that contains all enumerable properties of the {receiver} and its prototypes // have none, the map of the {receiver}. This is used to speed up the check for // deletions during a for-in. MaybeHandle<HeapObject> Enumerate(Isolate* isolate, Handle<JSReceiver> receiver) { JSObject::MakePrototypesFast(receiver, kStartAtReceiver, isolate); FastKeyAccumulator accumulator(isolate, receiver, KeyCollectionMode::kIncludePrototypes, ENUMERABLE_STRINGS, true); // Test if we have an enum cache for {receiver}. if (!accumulator.is_receiver_simple_enum()) { Handle<FixedArray> keys; ASSIGN_RETURN_ON_EXCEPTION( isolate, keys, accumulator.GetKeys(GetKeysConversion::kConvertToString), HeapObject); // Test again, since cache may have been built by GetKeys() calls above. if (!accumulator.is_receiver_simple_enum()) return keys; } DCHECK(!receiver->IsJSModuleNamespace()); return handle(receiver->map(), isolate); } // This is a slight modifcation of JSReceiver::HasProperty, dealing with // the oddities of JSProxy and JSModuleNamespace in for-in filter. MaybeHandle<Object> HasEnumerableProperty(Isolate* isolate, Handle<JSReceiver> receiver, Handle<Object> key) { bool success = false; Maybe<PropertyAttributes> result = Just(ABSENT); LookupIterator it = LookupIterator::PropertyOrElement(isolate, receiver, key, &success); if (!success) return isolate->factory()->undefined_value(); for (; it.IsFound(); it.Next()) { switch (it.state()) { case LookupIterator::NOT_FOUND: case LookupIterator::TRANSITION: UNREACHABLE(); case LookupIterator::JSPROXY: { // For proxies we have to invoke the [[GetOwnProperty]] trap. result = JSProxy::GetPropertyAttributes(&it); if (result.IsNothing()) return MaybeHandle<Object>(); if (result.FromJust() == ABSENT) { // Continue lookup on the proxy's prototype. Handle<JSProxy> proxy = it.GetHolder<JSProxy>(); Handle<Object> prototype; ASSIGN_RETURN_ON_EXCEPTION(isolate, prototype, JSProxy::GetPrototype(proxy), Object); if (prototype->IsNull(isolate)) { return isolate->factory()->undefined_value(); } // We already have a stack-check in JSProxy::GetPrototype. return HasEnumerableProperty( isolate, Handle<JSReceiver>::cast(prototype), key); } else if (result.FromJust() & DONT_ENUM) { return isolate->factory()->undefined_value(); } else { return it.GetName(); } } case LookupIterator::INTERCEPTOR: { result = JSObject::GetPropertyAttributesWithInterceptor(&it); if (result.IsNothing()) return MaybeHandle<Object>(); if (result.FromJust() != ABSENT) return it.GetName(); continue; } case LookupIterator::ACCESS_CHECK: { if (it.HasAccess()) continue; result = JSObject::GetPropertyAttributesWithFailedAccessCheck(&it); if (result.IsNothing()) return MaybeHandle<Object>(); if (result.FromJust() != ABSENT) return it.GetName(); return isolate->factory()->undefined_value(); } case LookupIterator::INTEGER_INDEXED_EXOTIC: // TypedArray out-of-bounds access. return isolate->factory()->undefined_value(); case LookupIterator::ACCESSOR: { if (it.GetHolder<Object>()->IsJSModuleNamespace()) { result = JSModuleNamespace::GetPropertyAttributes(&it); if (result.IsNothing()) return MaybeHandle<Object>(); DCHECK_EQ(0, result.FromJust() & DONT_ENUM); } return it.GetName(); } case LookupIterator::DATA: return it.GetName(); } } return isolate->factory()->undefined_value(); } } // namespace RUNTIME_FUNCTION(Runtime_ForInEnumerate) { HandleScope scope(isolate); DCHECK_EQ(1, args.length()); CONVERT_ARG_HANDLE_CHECKED(JSReceiver, receiver, 0); RETURN_RESULT_OR_FAILURE(isolate, Enumerate(isolate, receiver)); } RUNTIME_FUNCTION(Runtime_ForInHasProperty) { HandleScope scope(isolate); DCHECK_EQ(2, args.length()); CONVERT_ARG_HANDLE_CHECKED(JSReceiver, receiver, 0); CONVERT_ARG_HANDLE_CHECKED(Object, key, 1); Handle<Object> result; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, result, HasEnumerableProperty(isolate, receiver, key)); return isolate->heap()->ToBoolean(!result->IsUndefined(isolate)); } } // namespace internal } // namespace v8
如果真正需要枚举数组,应该使用传统的for(;;)
或是for ... of
。后者可以直接获取元素而非下标,其时间效率也在可接受的范围(相对于for(;;)
稍慢,但仍然为 复杂度)
各语言性能比较
另外附上各种语言的空间、时间消耗,在大多数题目中,Java 都能做到最快(大概没有计算 Java 虚拟机的启动时间),而 Go 往往可以做到空间最优,C/C++ 紧跟其后。
而与往常 Python 并不会拉开太大差距不同,这里 Python 时间消耗也很大,大概也是由于生成器本身耗时