diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index c0d007a90c3..f428c42e5f6 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -11,6 +11,7 @@ badsyntax baseinfo basetype binop +bltin boolop BUFMAX BUILDSTDLIB diff --git a/.github/workflows/upgrade-pylib.lock.yml b/.github/workflows/upgrade-pylib.lock.yml index 3ae6c3ddf79..4d8bd37a005 100644 --- a/.github/workflows/upgrade-pylib.lock.yml +++ b/.github/workflows/upgrade-pylib.lock.yml @@ -58,7 +58,7 @@ jobs: comment_repo: "" steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@fe858c3e14589bf396594a0b106e634d9065823e # v0.43.22 + uses: github/gh-aw/actions/setup@58d1d157fbac0f1204798500faefc4f7461ebe28 # v0.45.0 with: destination: /opt/gh-aw/actions - name: Check workflow file timestamps @@ -99,7 +99,7 @@ jobs: secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@fe858c3e14589bf396594a0b106e634d9065823e # v0.43.22 + uses: github/gh-aw/actions/setup@58d1d157fbac0f1204798500faefc4f7461ebe28 # v0.45.0 with: destination: /opt/gh-aw/actions - name: Checkout repository @@ -107,14 +107,14 @@ jobs: with: persist-credentials: false - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.12' - name: Create gh-aw temp directory run: bash /opt/gh-aw/actions/create_gh_aw_tmp_dir.sh # Cache configuration from frontmatter processed below - name: Cache (cpython-lib-${{ env.PYTHON_VERSION }}) - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: key: cpython-lib-${{ env.PYTHON_VERSION }} path: cpython @@ -804,12 +804,12 @@ jobs: total_count: ${{ steps.missing_tool.outputs.total_count }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@fe858c3e14589bf396594a0b106e634d9065823e # v0.43.22 + uses: github/gh-aw/actions/setup@58d1d157fbac0f1204798500faefc4f7461ebe28 # v0.45.0 with: destination: /opt/gh-aw/actions - name: Download agent output artifact continue-on-error: true - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: name: agent-output path: /tmp/gh-aw/safeoutputs/ @@ -925,18 +925,18 @@ jobs: success: ${{ steps.parse_results.outputs.success }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@fe858c3e14589bf396594a0b106e634d9065823e # v0.43.22 + uses: github/gh-aw/actions/setup@58d1d157fbac0f1204798500faefc4f7461ebe28 # v0.45.0 with: destination: /opt/gh-aw/actions - name: Download agent artifacts continue-on-error: true - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: name: agent-artifacts path: /tmp/gh-aw/threat-detection/ - name: Download agent output artifact continue-on-error: true - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: name: agent-output path: /tmp/gh-aw/threat-detection/ @@ -1037,12 +1037,12 @@ jobs: process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} steps: - name: Setup Scripts - uses: github/gh-aw/actions/setup@fe858c3e14589bf396594a0b106e634d9065823e # v0.43.22 + uses: github/gh-aw/actions/setup@58d1d157fbac0f1204798500faefc4f7461ebe28 # v0.45.0 with: destination: /opt/gh-aw/actions - name: Download agent output artifact continue-on-error: true - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: name: agent-output path: /tmp/gh-aw/safeoutputs/ @@ -1053,7 +1053,7 @@ jobs: echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" - name: Download patch artifact continue-on-error: true - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 with: name: agent-artifacts path: /tmp/gh-aw/ diff --git a/Cargo.lock b/Cargo.lock index 906621e099c..5b13350cb56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -303,7 +303,7 @@ version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -323,7 +323,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -345,9 +345,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "blake2" @@ -1761,7 +1761,7 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "libc", ] @@ -2002,7 +2002,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "cfg_aliases", "libc", @@ -2015,7 +2015,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "cfg_aliases", "libc", @@ -2148,7 +2148,7 @@ version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "foreign-types", "libc", @@ -2529,9 +2529,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf3ccafdf54c050be48a3a086d372f77ba6615f5057211607cd30e5ac5cec6d" +checksum = "14c738662e2181be11cb82487628404254902bb3225d8e9e99c31f3ef82a405c" dependencies = [ "libc", "once_cell", @@ -2543,18 +2543,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "972720a441c91fd9c49f212a1d2d74c6e3803b231ebc8d66c51efbd7ccab11c8" +checksum = "f9ca0864a7dd3c133a7f3f020cbff2e12e88420da854c35540fd20ce2d60e435" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5994456d9dab8934d600d3867571b6410f24fbd6002570ad56356733eb54859b" +checksum = "9dfc1956b709823164763a34cc42bbfd26b8730afa77809a3df8b94a3ae3b059" dependencies = [ "libc", "pyo3-build-config", @@ -2562,9 +2562,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11ce9cc8d81b3c4969748807604d92b4eef363c5bb82b1a1bdb34ec6f1093a18" +checksum = "29dc660ad948bae134d579661d08033fbb1918f4529c3bbe3257a68f2009ddf2" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2574,9 +2574,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf4b60036a154d23282679b658e3cc7d88d3b8c9a40b43824785f232d2e1b98" +checksum = "e78cd6c6d718acfcedf26c3d21fe0f053624368b0d44298c55d7138fde9331f7" dependencies = [ "heck", "proc-macro2", @@ -2732,7 +2732,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -2842,7 +2842,7 @@ version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?rev=a2f11d239f91cf8daedb0764ec15fcfe29c5ae6d#a2f11d239f91cf8daedb0764ec15fcfe29c5ae6d" dependencies = [ "aho-corasick", - "bitflags 2.10.0", + "bitflags 2.11.0", "compact_str", "get-size2", "is-macro", @@ -2859,7 +2859,7 @@ name = "ruff_python_parser" version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?rev=a2f11d239f91cf8daedb0764ec15fcfe29c5ae6d#a2f11d239f91cf8daedb0764ec15fcfe29c5ae6d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "bstr", "compact_str", "get-size2", @@ -2923,7 +2923,7 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys", @@ -3041,7 +3041,7 @@ name = "rustpython-codegen" version = "0.4.0" dependencies = [ "ahash", - "bitflags 2.10.0", + "bitflags 2.11.0", "indexmap", "insta", "itertools 0.14.0", @@ -3065,7 +3065,7 @@ name = "rustpython-common" version = "0.4.0" dependencies = [ "ascii", - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "getrandom 0.3.4", "itertools 0.14.0", @@ -3104,7 +3104,7 @@ dependencies = [ name = "rustpython-compiler-core" version = "0.4.0" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "itertools 0.14.0", "lz4_flex", "malachite-bigint", @@ -3194,7 +3194,7 @@ dependencies = [ name = "rustpython-sre_engine" version = "0.4.0" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "criterion", "num_enum", "optional", @@ -3307,7 +3307,7 @@ version = "0.4.0" dependencies = [ "ahash", "ascii", - "bitflags 2.10.0", + "bitflags 2.11.0", "bstr", "caseless", "cfg-if", @@ -3416,7 +3416,7 @@ version = "17.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "clipboard-win", "fd-lock", @@ -3503,7 +3503,7 @@ version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -3774,7 +3774,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.9.4", "system-configuration-sys", ] diff --git a/Cargo.toml b/Cargo.toml index 0ac56876a89..6356eef8c0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -154,7 +154,7 @@ ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", rev = "a2f11 phf = { version = "0.13.1", default-features = false, features = ["macros"]} ahash = "0.8.12" ascii = "1.1" -bitflags = "2.9.4" +bitflags = "2.11.0" bstr = "1" bytes = "1.11.1" cfg-if = "1.0" diff --git a/Lib/csv.py b/Lib/csv.py index cd202659873..0a627ba7a51 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -63,7 +63,6 @@ class excel: written as two quotes """ -import re import types from _csv import Error, writer, reader, register_dialect, \ unregister_dialect, get_dialect, list_dialects, \ @@ -281,6 +280,7 @@ def _guess_quote_and_delimiter(self, data, delimiters): If there is no quotechar the delimiter can't be determined this way. """ + import re matches = [] for restr in (r'(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?P=delim)', # ,".*?", diff --git a/Lib/test/audiodata/pluck-alaw.aifc b/Lib/test/audiodata/pluck-alaw.aifc deleted file mode 100644 index 3b7fbd2af75..00000000000 Binary files a/Lib/test/audiodata/pluck-alaw.aifc and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm16.aiff b/Lib/test/audiodata/pluck-pcm16.aiff deleted file mode 100644 index 6c8c40d1409..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm16.aiff and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm16.au b/Lib/test/audiodata/pluck-pcm16.au deleted file mode 100644 index 398f07f0719..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm16.au and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm24.aiff b/Lib/test/audiodata/pluck-pcm24.aiff deleted file mode 100644 index 8eba145a44d..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm24.aiff and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm24.au b/Lib/test/audiodata/pluck-pcm24.au deleted file mode 100644 index 0bb230418a3..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm24.au and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm32.aiff b/Lib/test/audiodata/pluck-pcm32.aiff deleted file mode 100644 index 46ac0373f6a..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm32.aiff and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm32.au b/Lib/test/audiodata/pluck-pcm32.au deleted file mode 100644 index 92ee5965e40..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm32.au and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm8.aiff b/Lib/test/audiodata/pluck-pcm8.aiff deleted file mode 100644 index 5de4f3b2d87..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm8.aiff and /dev/null differ diff --git a/Lib/test/audiodata/pluck-pcm8.au b/Lib/test/audiodata/pluck-pcm8.au deleted file mode 100644 index b7172c8f234..00000000000 Binary files a/Lib/test/audiodata/pluck-pcm8.au and /dev/null differ diff --git a/Lib/test/audiodata/pluck-ulaw.aifc b/Lib/test/audiodata/pluck-ulaw.aifc deleted file mode 100644 index 3085cf097fb..00000000000 Binary files a/Lib/test/audiodata/pluck-ulaw.aifc and /dev/null differ diff --git a/Lib/test/audiodata/pluck-ulaw.au b/Lib/test/audiodata/pluck-ulaw.au deleted file mode 100644 index 11103535c6b..00000000000 Binary files a/Lib/test/audiodata/pluck-ulaw.au and /dev/null differ diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index bf9b1875573..8af2f0b337c 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001,2002 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # csv package unit tests import copy @@ -10,7 +10,8 @@ import gc import pickle from test import support -from test.support import import_helper, check_disallow_instantiation +from test.support import cpython_only, import_helper, check_disallow_instantiation +from test.support.import_helper import ensure_lazy_imports from itertools import permutations from textwrap import dedent from collections import OrderedDict @@ -86,12 +87,12 @@ def _test_arg_valid(self, ctor, arg): self.assertRaises(ValueError, ctor, arg, quotechar='\x85', lineterminator='\x85') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) self.assertRaises(OSError, csv.reader, BadIterable()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_writer_arg_valid(self): self._test_arg_valid(csv.writer, StringIO()) class BadWriter: @@ -212,7 +213,7 @@ def test_write_bigfield(self): self._write_test([bigstring,bigstring], '%s,%s' % \ (bigstring, bigstring)) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_write_quoting(self): self._write_test(['a',1,'p,q'], 'a,1,"p,q"') self._write_error_test(csv.Error, ['a',1,'p,q'], @@ -230,7 +231,7 @@ def test_write_quoting(self): self._write_test(['a','',None,1], '"a","",,"1"', quoting = csv.QUOTE_NOTNULL) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_write_escape(self): self._write_test(['a',1,'p,q'], 'a,1,"p,q"', escapechar='\\') @@ -262,7 +263,7 @@ def test_write_escape(self): self._write_test(['C\\', '6', '7', 'X"'], 'C\\\\,6,7,"X"""', escapechar='\\', quoting=csv.QUOTE_MINIMAL) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_write_lineterminator(self): for lineterminator in '\r\n', '\n', '\r', '!@#', '\0': with self.subTest(lineterminator=lineterminator): @@ -276,7 +277,7 @@ def test_write_lineterminator(self): f'1,2{lineterminator}' f'"\r","\n"{lineterminator}') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_write_iterable(self): self._write_test(iter(['a', 1, 'p,q']), 'a,1,"p,q"') self._write_test(iter(['a', 1, None]), 'a,1,') @@ -319,7 +320,7 @@ def test_writerows_with_none(self): self.assertEqual(fileobj.read(), 'a\r\n""\r\n') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_write_empty_fields(self): self._write_test((), '') self._write_test([''], '""') @@ -333,7 +334,7 @@ def test_write_empty_fields(self): self._write_test(['', ''], ',') self._write_test([None, None], ',') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_write_empty_fields_space_delimiter(self): self._write_test([''], '""', delimiter=' ', skipinitialspace=False) self._write_test([''], '""', delimiter=' ', skipinitialspace=True) @@ -374,7 +375,7 @@ def _read_test(self, input, expect, **kwargs): result = list(reader) self.assertEqual(result, expect) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_oddinputs(self): self._read_test([], []) self._read_test([''], [[]]) @@ -385,7 +386,7 @@ def test_read_oddinputs(self): self.assertRaises(csv.Error, self._read_test, [b'abc'], None) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_eol(self): self._read_test(['a,b', 'c,d'], [['a','b'], ['c','d']]) self._read_test(['a,b\n', 'c,d\n'], [['a','b'], ['c','d']]) @@ -400,7 +401,7 @@ def test_read_eol(self): with self.assertRaisesRegex(csv.Error, errmsg): next(csv.reader(['a,b\r\nc,d'])) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_eof(self): self._read_test(['a,"'], [['a', '']]) self._read_test(['"a'], [['a']]) @@ -410,7 +411,7 @@ def test_read_eof(self): self.assertRaises(csv.Error, self._read_test, ['^'], [], escapechar='^', strict=True) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_nul(self): self._read_test(['\0'], [['\0']]) self._read_test(['a,\0b,c'], [['a', '\0b', 'c']]) @@ -423,7 +424,7 @@ def test_read_delimiter(self): self._read_test(['a;b;c'], [['a', 'b', 'c']], delimiter=';') self._read_test(['a\0b\0c'], [['a', 'b', 'c']], delimiter='\0') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_escape(self): self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\') self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\') @@ -436,7 +437,7 @@ def test_read_escape(self): self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None) self._read_test(['a,\\b,c'], [['a', '\\b', 'c']]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_quoting(self): self._read_test(['1,",3,",5'], [['1', ',3,', '5']]) self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], @@ -473,7 +474,7 @@ def test_read_quoting(self): self._read_test(['1\\.5,\\.5,"\\.5"'], [[1.5, 0.5, ".5"]], quoting=csv.QUOTE_STRINGS, escapechar='\\') - @unittest.skip('TODO: RUSTPYTHON; slice index starts at 1 but ends at 0') + @unittest.skip("TODO: RUSTPYTHON; slice index starts at 1 but ends at 0") def test_read_skipinitialspace(self): self._read_test(['no space, space, spaces,\ttab'], [['no space', 'space', 'spaces', '\ttab']], @@ -488,7 +489,7 @@ def test_read_skipinitialspace(self): [[None, None, None]], skipinitialspace=True, quoting=csv.QUOTE_STRINGS) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_space_delimiter(self): self._read_test(['a b', ' a ', ' ', ''], [['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []], @@ -528,7 +529,7 @@ def test_read_linenum(self): self.assertRaises(StopIteration, next, r) self.assertEqual(r.line_num, 3) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_roundtrip_quoteed_newlines(self): rows = [ ['\na', 'b\nc', 'd\n'], @@ -547,7 +548,7 @@ def test_roundtrip_quoteed_newlines(self): for i, row in enumerate(csv.reader(fileobj)): self.assertEqual(row, rows[i]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_roundtrip_escaped_unquoted_newlines(self): rows = [ ['\na', 'b\nc', 'd\n'], @@ -662,7 +663,7 @@ def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): fileobj.seek(0) self.assertEqual(fileobj.read(), expected) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" @@ -784,7 +785,7 @@ def test_quoted_quote(self): '"I see," said the blind man', 'as he picked up his hammer and saw']]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_quoted_nl(self): input = '''\ 1,2,3,"""I see,"" @@ -825,18 +826,18 @@ class EscapedExcel(csv.excel): class TestEscapedExcel(TestCsvBase): dialect = EscapedExcel() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_escape_fieldsep(self): self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_escape_fieldsep(self): self.readerAssertEqual('abc\\,def\r\n', [['abc,def']]) class TestDialectUnix(TestCsvBase): dialect = 'unix' - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple_writer(self): self.writerAssertEqual([[1, 'abc def', 'abc']], '"1","abc def","abc"\n') @@ -853,7 +854,7 @@ class TestQuotedEscapedExcel(TestCsvBase): def test_write_escape_fieldsep(self): self.writerAssertEqual([['abc,def']], '"abc,def"\r\n') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_escape_fieldsep(self): self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']]) @@ -941,6 +942,14 @@ def test_dict_reader_fieldnames_accepts_list(self): reader = csv.DictReader(f, fieldnames) self.assertEqual(reader.fieldnames, fieldnames) + def test_dict_reader_set_fieldnames(self): + fieldnames = ["a", "b", "c"] + f = StringIO() + reader = csv.DictReader(f) + self.assertIsNone(reader.fieldnames) + reader.fieldnames = fieldnames + self.assertEqual(reader.fieldnames, fieldnames) + def test_dict_writer_fieldnames_rejects_iter(self): fieldnames = ["a", "b", "c"] f = StringIO() @@ -956,6 +965,7 @@ def test_dict_writer_fieldnames_accepts_list(self): def test_dict_reader_fieldnames_is_optional(self): f = StringIO() reader = csv.DictReader(f, fieldnames=None) + self.assertIsNone(reader.fieldnames) def test_read_dict_fields(self): with TemporaryFile("w+", encoding="utf-8") as fileobj: @@ -1050,7 +1060,7 @@ def test_read_multi(self): "s1": 'abc', "s2": 'def'}) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_read_with_blanks(self): reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n", "1,2,abc,4,5,6\r\n"], @@ -1102,7 +1112,7 @@ def test_float_write(self): fileobj.seek(0) self.assertEqual(fileobj.read(), expected) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_char_write(self): import array, string a = array.array('w', string.ascii_letters) @@ -1147,19 +1157,22 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.quotechar = "''" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be a 1-character string') + '"quotechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.quotechar = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"quotechar" must be string or None, not int') + '"quotechar" must be a unicode character or None, ' + 'not int') def test_delimiter(self): class mydialect(csv.Dialect): @@ -1176,31 +1189,32 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, ' + 'not a string of length 3') mydialect.delimiter = "" with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be a 1-character string') + '"delimiter" must be a unicode character, not a string of length 0') mydialect.delimiter = b"," with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not bytes') + '"delimiter" must be a unicode character, not bytes') mydialect.delimiter = 4 with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not int') + '"delimiter" must be a unicode character, not int') mydialect.delimiter = None with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"delimiter" must be string, not NoneType') + '"delimiter" must be a unicode character, not NoneType') def test_escapechar(self): class mydialect(csv.Dialect): @@ -1214,20 +1228,32 @@ class mydialect(csv.Dialect): self.assertEqual(d.escapechar, "\\") mydialect.escapechar = "" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 0') mydialect.escapechar = "**" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not a string of length 2') mydialect.escapechar = b"*" - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not bytes'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not bytes') mydialect.escapechar = 4 - with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not int'): + with self.assertRaises(csv.Error) as cm: mydialect() + self.assertEqual(str(cm.exception), + '"escapechar" must be a unicode character or None, ' + 'not int') def test_lineterminator(self): class mydialect(csv.Dialect): @@ -1248,9 +1274,15 @@ class mydialect(csv.Dialect): with self.assertRaises(csv.Error) as cm: mydialect() self.assertEqual(str(cm.exception), - '"lineterminator" must be a string') + '"lineterminator" must be a string, not int') + + mydialect.lineterminator = None + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"lineterminator" must be a string, not NoneType') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_invalid_chars(self): def create_invalid(field_name, value, **kwargs): class mydialect(csv.Dialect): @@ -1357,6 +1389,19 @@ class TestSniffer(unittest.TestCase): ghi\0jkl """ + sample15 = "\n\n\n" + sample16 = "abc\ndef\nghi" + + sample17 = ["letter,offset"] + sample17.extend(f"{chr(ord('a') + i)},{i}" for i in range(20)) + sample17.append("v,twenty_one") # 'u' was skipped + sample17 = '\n'.join(sample17) + + sample18 = ["letter,offset"] + sample18.extend(f"{chr(ord('a') + i)},{i}" for i in range(21)) + sample18.append("v,twenty_one") # 'u' was not skipped + sample18 = '\n'.join(sample18) + def test_issue43625(self): sniffer = csv.Sniffer() self.assertTrue(sniffer.has_header(self.sample12)) @@ -1378,6 +1423,11 @@ def test_has_header_regex_special_delimiter(self): self.assertIs(sniffer.has_header(self.sample8), False) self.assertIs(sniffer.has_header(self.header2 + self.sample8), True) + def test_has_header_checks_20_rows(self): + sniffer = csv.Sniffer() + self.assertFalse(sniffer.has_header(self.sample17)) + self.assertTrue(sniffer.has_header(self.sample18)) + def test_guess_quote_and_delimiter(self): sniffer = csv.Sniffer() for header in (";'123;4';", "'123;4';", ";'123;4'", "'123;4'"): @@ -1427,6 +1477,10 @@ def test_delimiters(self): self.assertEqual(dialect.quotechar, "'") dialect = sniffer.sniff(self.sample14) self.assertEqual(dialect.delimiter, '\0') + self.assertRaisesRegex(csv.Error, "Could not determine delimiter", + sniffer.sniff, self.sample15) + self.assertRaisesRegex(csv.Error, "Could not determine delimiter", + sniffer.sniff, self.sample16) def test_doublequote(self): sniffer = csv.Sniffer() @@ -1592,6 +1646,10 @@ class MiscTestCase(unittest.TestCase): def test__all__(self): support.check__all__(self, csv, ('csv', '_csv')) + @cpython_only + def test_lazy_import(self): + ensure_lazy_imports("csv", {"re"}) + def test_subclassable(self): # issue 44089 class Foo(csv.Error): ... diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 930e409fb2e..f4fca1caec7 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -383,7 +383,7 @@ def test_ast_line_numbers_multiline_fstring(self): self.assertEqual(t.body[0].value.values[1].value.col_offset, 11) self.assertEqual(t.body[0].value.values[1].value.end_col_offset, 16) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 4 != 5 def test_ast_line_numbers_with_parentheses(self): expr = """ x = ( @@ -587,7 +587,6 @@ def test_ast_compile_time_concat(self): exec(c) self.assertEqual(x[0], 'foo3') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_compile_time_concat_errors(self): self.assertAllRaise(SyntaxError, 'cannot mix bytes and nonbytes literals', @@ -600,7 +599,6 @@ def test_literal(self): self.assertEqual(f'a', 'a') self.assertEqual(f' ', ' ') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_unterminated_string(self): self.assertAllRaise(SyntaxError, 'unterminated string', [r"""f'{"x'""", @@ -609,7 +607,7 @@ def test_unterminated_string(self): r"""f'{("x}'""", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") def test_mismatched_parens(self): self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' " @@ -632,14 +630,24 @@ def test_mismatched_parens(self): r"does not match opening parenthesis '\('", ["f'{a(4}'", ]) - self.assertRaises(SyntaxError, eval, "f'{" + "("*500 + "}'") + self.assertRaises(SyntaxError, eval, "f'{" + "("*20 + "}'") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: No exception raised @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") def test_fstring_nested_too_deeply(self): - self.assertAllRaise(SyntaxError, - "f-string: expressions nested too deeply", - ['f"{1+2:{1+2:{1+1:{1}}}}"']) + def raises_syntax_or_memory_error(txt): + try: + eval(txt) + except SyntaxError: + pass + except MemoryError: + pass + except Exception as ex: + self.fail(f"Should raise SyntaxError or MemoryError, not {type(ex)}") + else: + self.fail("No exception raised") + + raises_syntax_or_memory_error('f"{1+2:{1+2:{1+1:{1}}}}"') def create_nested_fstring(n): if n == 0: @@ -647,9 +655,10 @@ def create_nested_fstring(n): prev = create_nested_fstring(n-1) return f'f"{{{prev}}}"' - self.assertAllRaise(SyntaxError, - "too many nested f-strings", - [create_nested_fstring(160)]) + raises_syntax_or_memory_error(create_nested_fstring(160)) + raises_syntax_or_memory_error("f'{" + "("*100 + "}'") + raises_syntax_or_memory_error("f'{" + "("*1000 + "}'") + raises_syntax_or_memory_error("f'{" + "("*10_000 + "}'") def test_syntax_error_in_nested_fstring(self): # See gh-104016 for more information on this crash @@ -692,7 +701,7 @@ def test_double_braces(self): ["f'{ {{}} }'", # dict in a set ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_compile_time_concat(self): x = 'def' self.assertEqual('abc' f'## {x}ghi', 'abc## defghi') @@ -730,7 +739,7 @@ def test_compile_time_concat(self): ['''f'{3' f"}"''', # can't concat to get a valid f-string ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_comments(self): # These aren't comments, since they're in strings. d = {'#': 'hash'} @@ -807,7 +816,7 @@ def build_fstr(n, extra=''): s = "f'{1}' 'x' 'y'" * 1024 self.assertEqual(eval(s), '1xy' * 1024) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_format_specifier_expressions(self): width = 10 precision = 4 @@ -841,7 +850,6 @@ def test_format_specifier_expressions(self): """f'{"s"!{"r"}}'""", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_custom_format_specifier(self): class CustomFormat: def __format__(self, format_spec): @@ -863,7 +871,7 @@ def __format__(self, spec): x = X() self.assertEqual(f'{x} {x}', '1 2') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_missing_expression(self): self.assertAllRaise(SyntaxError, "f-string: valid expression required before '}'", @@ -926,7 +934,7 @@ def test_missing_expression(self): "\xa0", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_parens_in_expressions(self): self.assertEqual(f'{3,}', '(3,)') @@ -939,13 +947,12 @@ def test_parens_in_expressions(self): ["f'{3)+(4}'", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_newlines_before_syntax_error(self): self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'", ["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_backslashes_in_string_part(self): self.assertEqual(f'\t', '\t') self.assertEqual(r'\t', '\\t') @@ -1004,7 +1011,7 @@ def test_backslashes_in_string_part(self): self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam') self.assertEqual(f'\\\N{AMPERSAND}', '\\&') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_misformed_unicode_character_name(self): # These test are needed because unicode names are parsed # differently inside f-strings. @@ -1024,7 +1031,7 @@ def test_misformed_unicode_character_name(self): r"'\N{GREEK CAPITAL LETTER DELTA'", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_backslashes_in_expression_part(self): self.assertEqual(f"{( 1 + @@ -1040,7 +1047,6 @@ def test_backslashes_in_expression_part(self): ["f'{\n}'", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_invalid_backslashes_inside_fstring_context(self): # All of these variations are invalid python syntax, # so they are also invalid in f-strings as well. @@ -1075,7 +1081,7 @@ def test_newlines_in_expressions(self): self.assertEqual(rf'''{3+ 4}''', '7') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "f-string: expecting a valid expression after '{'" does not match "invalid syntax (, line 1)" def test_lambda(self): x = 5 self.assertEqual(f'{(lambda y:x*y)("8")!r}', "'88888'") @@ -1118,7 +1124,6 @@ def test_roundtrip_raw_quotes(self): self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'') self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_fstring_backslash_before_double_bracket(self): deprecated_cases = [ (r"f'\{{\}}'", '\\{\\}'), @@ -1138,7 +1143,6 @@ def test_fstring_backslash_before_double_bracket(self): self.assertEqual(fr'\}}{1+1}', '\\}2') self.assertEqual(fr'{1+1}\}}', '2\\}') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_fstring_backslash_before_double_bracket_warns_once(self): with self.assertWarns(SyntaxWarning) as w: eval(r"f'\{{'") @@ -1288,6 +1292,7 @@ def test_nested_fstrings(self): self.assertEqual(f'{f"{0}"*3}', '000') self.assertEqual(f'{f"{y}"*3}', '555') + @unittest.expectedFailure # TODO: RUSTPYTHON def test_invalid_string_prefixes(self): single_quote_cases = ["fu''", "uf''", @@ -1312,7 +1317,7 @@ def test_invalid_string_prefixes(self): "Bf''", "BF''",] double_quote_cases = [case.replace("'", '"') for case in single_quote_cases] - self.assertAllRaise(SyntaxError, 'invalid syntax', + self.assertAllRaise(SyntaxError, 'prefixes are incompatible', single_quote_cases + double_quote_cases) def test_leading_trailing_spaces(self): @@ -1342,7 +1347,7 @@ def test_equal_equal(self): self.assertEqual(f'{0==1}', 'False') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_conversions(self): self.assertEqual(f'{3.14:10.10}', ' 3.14') self.assertEqual(f'{1.25!s:10.10}', '1.25 ') @@ -1367,7 +1372,6 @@ def test_conversions(self): self.assertAllRaise(SyntaxError, "f-string: expecting '}'", ["f'{3!'", "f'{3!s'", - "f'{3!g'", ]) self.assertAllRaise(SyntaxError, 'f-string: missing conversion character', @@ -1408,14 +1412,13 @@ def test_assignment(self): "f'{x}' = x", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_del(self): self.assertAllRaise(SyntaxError, 'invalid syntax', ["del f''", "del '' f''", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_mismatched_braces(self): self.assertAllRaise(SyntaxError, "f-string: single '}' is not allowed", ["f'{{}'", @@ -1514,7 +1517,6 @@ def test_str_format_differences(self): self.assertEqual('{d[a]}'.format(d=d), 'string') self.assertEqual('{d[0]}'.format(d=d), 'integer') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_errors(self): # see issue 26287 self.assertAllRaise(TypeError, 'unsupported', @@ -1557,7 +1559,6 @@ def test_backslash_char(self): self.assertEqual(eval('f"\\\n"'), '') self.assertEqual(eval('f"\\\r"'), '') - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: '1+2 = # my comment\n 3' != '1+2 = \n 3' def test_debug_conversion(self): x = 'A string' self.assertEqual(f'{x=}', 'x=' + repr(x)) @@ -1705,7 +1706,7 @@ def test_walrus(self): self.assertEqual(f'{(x:=10)}', '10') self.assertEqual(x, 10) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "f-string: expecting '=', or '!', or ':', or '}'" does not match "invalid syntax (?, line 1)" def test_invalid_syntax_error_message(self): with self.assertRaisesRegex(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'"): @@ -1731,7 +1732,7 @@ def test_with_an_underscore_and_a_comma_in_format_specifier(self): with self.assertRaisesRegex(ValueError, error_msg): f'{1:_,}' - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "f-string: expecting a valid expression after '{'" does not match "invalid syntax (?, line 1)" def test_syntax_error_for_starred_expressions(self): with self.assertRaisesRegex(SyntaxError, "can't use starred expression here"): compile("f'{*a}'", "?", "exec") @@ -1740,7 +1741,7 @@ def test_syntax_error_for_starred_expressions(self): "f-string: expecting a valid expression after '{'"): compile("f'{**a}'", "?", "exec") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; - def test_not_closing_quotes(self): self.assertAllRaise(SyntaxError, "unterminated f-string literal", ['f"', "f'"]) self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal", @@ -1760,7 +1761,7 @@ def test_not_closing_quotes(self): except SyntaxError as e: self.assertEqual(e.text, 'z = f"""') self.assertEqual(e.lineno, 3) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_syntax_error_after_debug(self): self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'", [ @@ -1788,7 +1789,6 @@ def test_debug_in_file(self): self.assertEqual(stdout.decode('utf-8').strip().replace('\r\n', '\n').replace('\r', '\n'), "3\n=3") - @unittest.expectedFailure # TODO: RUSTPYTHON def test_syntax_warning_infinite_recursion_in_file(self): with temp_cwd(): script = 'script.py' @@ -1878,6 +1878,13 @@ def __format__(self, format): # Test multiple format specs in same raw f-string self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n') + def test_gh139516(self): + with temp_cwd(): + script = 'script.py' + with open(script, 'wb') as f: + f.write('''def f(a): pass\nf"{f(a=lambda: 'à'\n)}"'''.encode()) + assert_python_ok(script) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index b72d09865d8..9ebd6dd9cc1 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1232,7 +1232,6 @@ def test_getandroidapilevel(self): self.assertIsInstance(level, int) self.assertGreater(level, 0) - @unittest.expectedFailure # TODO: RUSTPYTHON @force_not_colorized @support.requires_subprocess() def test_sys_tracebacklimit(self): diff --git a/Lib/test/test_unpack.py b/Lib/test/test_unpack.py index 515ec128a08..305da05b7ce 100644 --- a/Lib/test/test_unpack.py +++ b/Lib/test/test_unpack.py @@ -18,6 +18,13 @@ >>> a == 4 and b == 5 and c == 6 True +Unpack dict + + >>> d = {4: 'four', 5: 'five', 6: 'six'} + >>> a, b, c = d + >>> a == 4 and b == 5 and c == 6 + True + Unpack implied tuple >>> a, b, c = 7, 8, 9 @@ -66,14 +73,14 @@ >>> a, b = t Traceback (most recent call last): ... - ValueError: too many values to unpack (expected 2) + ValueError: too many values to unpack (expected 2, got 3) Unpacking tuple of wrong size >>> a, b = l Traceback (most recent call last): ... - ValueError: too many values to unpack (expected 2) + ValueError: too many values to unpack (expected 2, got 3) Unpacking sequence too short @@ -140,14 +147,59 @@ >>> () = [42] Traceback (most recent call last): ... - ValueError: too many values to unpack (expected 0) + ValueError: too many values to unpack (expected 0, got 1) + +Unpacking a larger iterable should raise ValuleError, but it +should not entirely consume the iterable + >>> it = iter(range(100)) + >>> x, y, z = it + Traceback (most recent call last): + ... + ValueError: too many values to unpack (expected 3) + >>> next(it) + 4 + +Unpacking unbalanced dict + + >>> d = {4: 'four', 5: 'five', 6: 'six', 7: 'seven'} + >>> a, b, c = d + Traceback (most recent call last): + ... + ValueError: too many values to unpack (expected 3, got 4) + +Ensure that custom `__len__()` is NOT called when showing the error message + + >>> class LengthTooLong: + ... def __len__(self): + ... return 5 + ... def __getitem__(self, i): + ... return i*2 + ... + >>> x, y, z = LengthTooLong() + Traceback (most recent call last): + ... + ValueError: too many values to unpack (expected 3) + +For evil cases like these as well, no actual count to be shown + + >>> class BadLength: + ... def __len__(self): + ... return 1 + ... def __getitem__(self, i): + ... return i*2 + ... + >>> x, y, z = BadLength() + Traceback (most recent call last): + ... + ValueError: too many values to unpack (expected 3) """ __test__ = {'doctests' : doctests} def load_tests(loader, tests, pattern): - tests.addTest(doctest.DocTestSuite()) + from test.support.rustpython import DocTestChecker # TODO: RUSTPYTHON + tests.addTest(doctest.DocTestSuite(checker=DocTestChecker())) # XXX: RUSTPYTHON return tests diff --git a/Lib/test/test_unpack_ex.py b/Lib/test/test_unpack_ex.py index d84befd9c7b..1496e3be93f 100644 --- a/Lib/test/test_unpack_ex.py +++ b/Lib/test/test_unpack_ex.py @@ -402,15 +402,10 @@ __test__ = {'doctests' : doctests} -EXPECTED_FAILURE = doctest.register_optionflag('EXPECTED_FAILURE') # TODO: RUSTPYTHON -class CustomOutputChecker(doctest.OutputChecker): # TODO: RUSTPYTHON - def check_output(self, want, got, optionflags): # TODO: RUSTPYTHON - if optionflags & EXPECTED_FAILURE: # TODO: RUSTPYTHON - return not super().check_output(want, got, optionflags) # TODO: RUSTPYTHON - return super().check_output(want, got, optionflags) # TODO: RUSTPYTHON def load_tests(loader, tests, pattern): - tests.addTest(doctest.DocTestSuite(checker=CustomOutputChecker())) # TODO: RUSTPYTHON + from test.support.rustpython import DocTestChecker # TODO: RUSTPYTHON + tests.addTest(doctest.DocTestSuite(checker=DocTestChecker())) # XXX: RUSTPYTHON return tests diff --git a/Lib/test/test_warnings/__init__.py b/Lib/test/test_warnings/__init__.py index d466128e8be..53ac0363a3c 100644 --- a/Lib/test/test_warnings/__init__.py +++ b/Lib/test/test_warnings/__init__.py @@ -1478,7 +1478,6 @@ def test_envvar_and_command_line(self): self.assertEqual(stdout, b"['ignore::DeprecationWarning', 'ignore::UnicodeWarning']") - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b"['error::DeprecationWarning']" != b"['default::DeprecationWarning', 'error::DeprecationWarning']" @force_not_colorized def test_conflicting_envvar_and_command_line(self): rc, stdout, stderr = assert_python_failure("-Werror::DeprecationWarning", "-c", diff --git a/Lib/test/test_winsound.py b/Lib/test/test_winsound.py new file mode 100644 index 00000000000..9724d830ade --- /dev/null +++ b/Lib/test/test_winsound.py @@ -0,0 +1,187 @@ +# Ridiculously simple test of the winsound module for Windows. + +import functools +import os +import time +import unittest + +from test import support +from test.support import import_helper +from test.support import os_helper + + +support.requires('audio') +winsound = import_helper.import_module('winsound') + + +# Unless we actually have an ear in the room, we have no idea whether a sound +# actually plays, and it's incredibly flaky trying to figure out if a sound +# even *should* play. Instead of guessing, just call the function and assume +# it either passed or raised the RuntimeError we expect in case of failure. +def sound_func(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + ret = func(*args, **kwargs) + except RuntimeError as e: + if support.verbose: + print(func.__name__, 'failed:', e) + else: + if support.verbose: + print(func.__name__, 'returned') + return ret + return wrapper + + +safe_Beep = sound_func(winsound.Beep) +safe_MessageBeep = sound_func(winsound.MessageBeep) +safe_PlaySound = sound_func(winsound.PlaySound) + + +class BeepTest(unittest.TestCase): + + def test_errors(self): + self.assertRaises(TypeError, winsound.Beep) + self.assertRaises(ValueError, winsound.Beep, 36, 75) + self.assertRaises(ValueError, winsound.Beep, 32768, 75) + + def test_extremes(self): + safe_Beep(37, 75) + safe_Beep(32767, 75) + + def test_increasingfrequency(self): + for i in range(100, 2000, 100): + safe_Beep(i, 75) + + def test_keyword_args(self): + safe_Beep(duration=75, frequency=2000) + + +class MessageBeepTest(unittest.TestCase): + + def tearDown(self): + time.sleep(0.5) + + def test_default(self): + self.assertRaises(TypeError, winsound.MessageBeep, "bad") + self.assertRaises(TypeError, winsound.MessageBeep, 42, 42) + safe_MessageBeep() + + def test_ok(self): + safe_MessageBeep(winsound.MB_OK) + + def test_asterisk(self): + safe_MessageBeep(winsound.MB_ICONASTERISK) + + def test_exclamation(self): + safe_MessageBeep(winsound.MB_ICONEXCLAMATION) + + def test_hand(self): + safe_MessageBeep(winsound.MB_ICONHAND) + + def test_question(self): + safe_MessageBeep(winsound.MB_ICONQUESTION) + + def test_error(self): + safe_MessageBeep(winsound.MB_ICONERROR) + + def test_information(self): + safe_MessageBeep(winsound.MB_ICONINFORMATION) + + def test_stop(self): + safe_MessageBeep(winsound.MB_ICONSTOP) + + def test_warning(self): + safe_MessageBeep(winsound.MB_ICONWARNING) + + def test_keyword_args(self): + safe_MessageBeep(type=winsound.MB_OK) + + +class PlaySoundTest(unittest.TestCase): + + def test_errors(self): + self.assertRaises(TypeError, winsound.PlaySound) + self.assertRaises(TypeError, winsound.PlaySound, "bad", "bad") + self.assertRaises( + RuntimeError, + winsound.PlaySound, + "none", winsound.SND_ASYNC | winsound.SND_MEMORY + ) + self.assertRaises(TypeError, winsound.PlaySound, b"bad", 0) + self.assertRaises(TypeError, winsound.PlaySound, "bad", + winsound.SND_MEMORY) + self.assertRaises(TypeError, winsound.PlaySound, 1, 0) + # embedded null character + self.assertRaises(ValueError, winsound.PlaySound, 'bad\0', 0) + + def test_keyword_args(self): + safe_PlaySound(flags=winsound.SND_ALIAS, sound="SystemExit") + + def test_snd_memory(self): + with open(support.findfile('pluck-pcm8.wav', + subdir='audiodata'), 'rb') as f: + audio_data = f.read() + safe_PlaySound(audio_data, winsound.SND_MEMORY) + audio_data = bytearray(audio_data) + safe_PlaySound(audio_data, winsound.SND_MEMORY) + + def test_snd_filename(self): + fn = support.findfile('pluck-pcm8.wav', subdir='audiodata') + safe_PlaySound(fn, winsound.SND_FILENAME | winsound.SND_NODEFAULT) + + def test_snd_filepath(self): + fn = support.findfile('pluck-pcm8.wav', subdir='audiodata') + path = os_helper.FakePath(fn) + safe_PlaySound(path, winsound.SND_FILENAME | winsound.SND_NODEFAULT) + + def test_snd_filepath_as_bytes(self): + fn = support.findfile('pluck-pcm8.wav', subdir='audiodata') + self.assertRaises( + TypeError, + winsound.PlaySound, + os_helper.FakePath(os.fsencode(fn)), + winsound.SND_FILENAME | winsound.SND_NODEFAULT + ) + + def test_aliases(self): + aliases = [ + "SystemAsterisk", + "SystemExclamation", + "SystemExit", + "SystemHand", + "SystemQuestion", + ] + for alias in aliases: + with self.subTest(alias=alias): + safe_PlaySound(alias, winsound.SND_ALIAS) + + def test_alias_fallback(self): + safe_PlaySound('!"$%&/(#+*', winsound.SND_ALIAS) + + def test_alias_nofallback(self): + safe_PlaySound('!"$%&/(#+*', winsound.SND_ALIAS | winsound.SND_NODEFAULT) + + def test_stopasync(self): + safe_PlaySound( + 'SystemQuestion', + winsound.SND_ALIAS | winsound.SND_ASYNC | winsound.SND_LOOP + ) + time.sleep(0.5) + safe_PlaySound('SystemQuestion', winsound.SND_ALIAS | winsound.SND_NOSTOP) + # Issue 8367: PlaySound(None, winsound.SND_PURGE) + # does not raise on systems without a sound card. + winsound.PlaySound(None, winsound.SND_PURGE) + + def test_sound_sentry(self): + safe_PlaySound("SystemExit", winsound.SND_ALIAS | winsound.SND_SENTRY) + + def test_sound_sync(self): + safe_PlaySound("SystemExit", winsound.SND_ALIAS | winsound.SND_SYNC) + + def test_sound_system(self): + safe_PlaySound("SystemExit", winsound.SND_ALIAS | winsound.SND_SYSTEM) + + +if __name__ == "__main__": + unittest.main() diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index c132c0f4b09..205facd65bd 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -6611,33 +6611,45 @@ impl Compiler { /// Compile a boolean operation as an expression. /// This means, that the last value remains on the stack. fn compile_bool_op(&mut self, op: &ast::BoolOp, values: &[ast::Expr]) -> CompileResult<()> { - let after_block = self.new_block(); + self.compile_bool_op_with_target(op, values, None) + } + /// Compile a boolean operation as an expression, with an optional + /// short-circuit target override. When `short_circuit_target` is `Some`, + /// the short-circuit jumps go to that block instead of the default + /// `after_block`, enabling jump threading to avoid redundant `__bool__` calls. + fn compile_bool_op_with_target( + &mut self, + op: &ast::BoolOp, + values: &[ast::Expr], + short_circuit_target: Option, + ) -> CompileResult<()> { + let after_block = self.new_block(); let (last_value, values) = values.split_last().unwrap(); + let jump_target = short_circuit_target.unwrap_or(after_block); for value in values { - self.compile_expression(value)?; - - emit!(self, Instruction::Copy { index: 1_u32 }); - match op { - ast::BoolOp::And => { - emit!( - self, - Instruction::PopJumpIfFalse { - target: after_block, - } - ); - } - ast::BoolOp::Or => { - emit!( - self, - Instruction::PopJumpIfTrue { - target: after_block, - } - ); - } + // Optimization: when a non-last value is a BoolOp with the opposite + // operator, redirect its short-circuit exits to skip the outer's + // redundant __bool__ test (jump threading). + if short_circuit_target.is_none() + && let ast::Expr::BoolOp(ast::ExprBoolOp { + op: inner_op, + values: inner_values, + .. + }) = value + && inner_op != op + { + let pop_block = self.new_block(); + self.compile_bool_op_with_target(inner_op, inner_values, Some(pop_block))?; + self.emit_short_circuit_test(op, after_block); + self.switch_to_block(pop_block); + emit!(self, Instruction::PopTop); + continue; } + self.compile_expression(value)?; + self.emit_short_circuit_test(op, jump_target); emit!(self, Instruction::PopTop); } @@ -6647,6 +6659,20 @@ impl Compiler { Ok(()) } + /// Emit `Copy 1` + conditional jump for short-circuit evaluation. + /// For `And`, emits `PopJumpIfFalse`; for `Or`, emits `PopJumpIfTrue`. + fn emit_short_circuit_test(&mut self, op: &ast::BoolOp, target: BlockIdx) { + emit!(self, Instruction::Copy { index: 1_u32 }); + match op { + ast::BoolOp::And => { + emit!(self, Instruction::PopJumpIfFalse { target }); + } + ast::BoolOp::Or => { + emit!(self, Instruction::PopJumpIfTrue { target }); + } + } + } + fn compile_dict(&mut self, items: &[ast::DictItem]) -> CompileResult<()> { let has_unpacking = items.iter().any(|item| item.key.is_none()); @@ -8450,7 +8476,12 @@ impl Compiler { if let Some(ast::DebugText { leading, trailing }) = &fstring_expr.debug_text { let range = fstring_expr.expression.range(); let source = self.source_file.slice(range); - let text = [leading, source, trailing].concat(); + let text = [ + strip_fstring_debug_comments(leading).as_str(), + source, + strip_fstring_debug_comments(trailing).as_str(), + ] + .concat(); self.emit_load_const(ConstantData::Str { value: text.into() }); element_count += 1; @@ -8786,6 +8817,27 @@ impl ToU32 for usize { } } +/// Strip Python comments from f-string debug text (leading/trailing around `=`). +/// A comment starts with `#` and extends to the end of the line. +/// The newline character itself is preserved. +fn strip_fstring_debug_comments(text: &str) -> String { + let mut result = String::with_capacity(text.len()); + let mut in_comment = false; + for ch in text.chars() { + if in_comment { + if ch == '\n' { + in_comment = false; + result.push(ch); + } + } else if ch == '#' { + in_comment = true; + } else { + result.push(ch); + } + } + result +} + #[cfg(test)] mod ruff_tests { use super::*; @@ -8980,6 +9032,15 @@ if (True and False) or (False and True): )); } + #[test] + fn test_nested_bool_op() { + assert_dis_snapshot!(compile_exec( + "\ +x = Test() and False or False +" + )); + } + #[test] fn test_nested_double_async_with() { assert_dis_snapshot!(compile_exec( diff --git a/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_bool_op.snap b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_bool_op.snap new file mode 100644 index 00000000000..5b9a2182bdf --- /dev/null +++ b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_bool_op.snap @@ -0,0 +1,20 @@ +--- +source: crates/codegen/src/compile.rs +assertion_line: 9071 +expression: "compile_exec(\"\\\nx = Test() and False or False\n\")" +--- + 1 0 RESUME (0) + 1 LOAD_NAME (0, Test) + 2 PUSH_NULL + 3 CALL (0) + 4 COPY (1) + 5 POP_JUMP_IF_FALSE (10) + 6 POP_TOP + 7 LOAD_CONST (False) + 8 COPY (1) + 9 POP_JUMP_IF_TRUE (12) + >> 10 POP_TOP + 11 LOAD_CONST (False) + >> 12 STORE_NAME (1, x) + 13 LOAD_CONST (None) + 14 RETURN_VALUE diff --git a/crates/common/src/format.rs b/crates/common/src/format.rs index 2842bd0a3d4..40bc9e53046 100644 --- a/crates/common/src/format.rs +++ b/crates/common/src/format.rs @@ -149,6 +149,7 @@ pub enum FormatType { GeneralFormat(Case), FixedPoint(Case), Percentage, + Unknown(char), } impl From<&FormatType> for char { @@ -170,6 +171,7 @@ impl From<&FormatType> for char { FormatType::FixedPoint(Case::Lower) => 'f', FormatType::FixedPoint(Case::Upper) => 'F', FormatType::Percentage => '%', + FormatType::Unknown(c) => *c, } } } @@ -194,6 +196,7 @@ impl FormatParse for FormatType { Some('g') => (Some(Self::GeneralFormat(Case::Lower)), chars.as_wtf8()), Some('G') => (Some(Self::GeneralFormat(Case::Upper)), chars.as_wtf8()), Some('%') => (Some(Self::Percentage), chars.as_wtf8()), + Some(c) => (Some(Self::Unknown(c)), chars.as_wtf8()), _ => (None, text), } } @@ -429,7 +432,8 @@ impl FormatSpec { | FormatType::FixedPoint(_) | FormatType::GeneralFormat(_) | FormatType::Exponent(_) - | FormatType::Percentage, + | FormatType::Percentage + | FormatType::Number(_), ) => 3, None => 3, _ => panic!("Separators only valid for numbers!"), @@ -475,6 +479,7 @@ impl FormatSpec { let first_letter = (input.to_string().as_bytes()[0] as char).to_uppercase(); Ok(first_letter.collect::() + &input.to_string()[1..]) } + Some(FormatType::Unknown(c)) => Err(FormatSpecError::UnknownFormatCode(*c, "int")), _ => Err(FormatSpecError::InvalidFormatSpecifier), } } @@ -496,7 +501,8 @@ impl FormatSpec { | Some(FormatType::Hex(_)) | Some(FormatType::String) | Some(FormatType::Character) - | Some(FormatType::Number(Case::Upper)) => { + | Some(FormatType::Number(Case::Upper)) + | Some(FormatType::Unknown(_)) => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "float")) } @@ -609,6 +615,7 @@ impl FormatSpec { Some(float) => return self.format_float(float), _ => Err(FormatSpecError::UnableToConvert), }, + Some(FormatType::Unknown(c)) => Err(FormatSpecError::UnknownFormatCode(c, "int")), None => self.format_int_radix(magnitude, 10), }?; let format_sign = self.sign.unwrap_or(FormatSign::Minus); @@ -707,7 +714,8 @@ impl FormatSpec { | Some(FormatType::String) | Some(FormatType::Character) | Some(FormatType::Number(Case::Upper)) - | Some(FormatType::Percentage) => { + | Some(FormatType::Percentage) + | Some(FormatType::Unknown(_)) => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "complex")) } diff --git a/crates/stdlib/Cargo.toml b/crates/stdlib/Cargo.toml index e1ac223fb1e..987bca8e88f 100644 --- a/crates/stdlib/Cargo.toml +++ b/crates/stdlib/Cargo.toml @@ -58,7 +58,7 @@ xml = "1.2" # random rand_core = { workspace = true } -mt19937 = "3.1" +mt19937 = "<=3.2" # upgrade it once rand is upgraded # Crypto: digest = "0.10.7" diff --git a/crates/stdlib/src/csv.rs b/crates/stdlib/src/csv.rs index 0eecd07c936..b898dc8c106 100644 --- a/crates/stdlib/src/csv.rs +++ b/crates/stdlib/src/csv.rs @@ -17,7 +17,7 @@ mod _csv { use itertools::{self, Itertools}; use parking_lot::Mutex; use rustpython_common::lock::LazyLock; - use rustpython_vm::match_class; + use rustpython_vm::{match_class, sliceable::SliceableSequenceOp}; use std::collections::HashMap; #[pyattr] @@ -139,34 +139,43 @@ mod _csv { match_class!(match obj.to_owned() { s @ PyStr => { Ok(s.as_str().bytes().exactly_one().map_err(|_| { - let msg = r#""delimiter" must be a 1-character string"#; - vm.new_type_error(msg.to_owned()) + vm.new_type_error(format!( + r#""delimiter" must be a unicode character, not a string of length {}"#, + s.len() + )) })?) } attr => { - let msg = format!("\"delimiter\" must be string, not {}", attr.class()); + let msg = format!( + r#""delimiter" must be a unicode character, not {}"#, + attr.class() + ); Err(vm.new_type_error(msg)) } }) } } + fn parse_quotechar_from_obj(vm: &VirtualMachine, obj: &PyObject) -> PyResult> { match_class!(match obj.get_attr("quotechar", vm)? { s @ PyStr => { Ok(Some(s.as_str().bytes().exactly_one().map_err(|_| { vm.new_exception_msg( super::_csv::error(vm), - r#""quotechar" must be a 1-character string"#.to_owned(), + format!(r#""quotechar" must be a unicode character or None, not a string of length {}"#, s.len()), ) })?)) } _n @ PyNone => { Ok(None) } - _ => { + attr => { Err(vm.new_exception_msg( super::_csv::error(vm), - r#""quotechar" must be string or None, not int"#.to_owned(), + format!( + r#""quotechar" must be a unicode character or None, not {}"#, + attr.class() + ), )) } }) @@ -177,7 +186,7 @@ mod _csv { Ok(Some(s.as_str().bytes().exactly_one().map_err(|_| { vm.new_exception_msg( super::_csv::error(vm), - r#""escapechar" must be a 1-character string"#.to_owned(), + format!(r#""escapechar" must be a unicode character or None, not a string of length {}"#, s.len()), ) })?)) } @@ -186,7 +195,7 @@ mod _csv { } attr => { let msg = format!( - "\"escapechar\" must be string or None, not {}", + r#""escapechar" must be a unicode character or None, not {}"#, attr.class() ); Err(vm.new_type_error(msg.to_owned())) @@ -210,9 +219,11 @@ mod _csv { )); }) } - _ => { - let msg = "\"lineterminator\" must be a string".to_string(); - Err(vm.new_type_error(msg.to_owned())) + attr => { + Err(vm.new_type_error(format!( + r#""lineterminator" must be a string, not {}"#, + attr.class() + ))) } }) } @@ -225,7 +236,7 @@ mod _csv { })?) } attr => { - let msg = format!("\"quoting\" must be string or None, not {}", attr.class()); + let msg = format!(r#""quoting" must be string or None, not {}"#, attr.class()); Err(vm.new_type_error(msg.to_owned())) } }) diff --git a/crates/stdlib/src/faulthandler.rs b/crates/stdlib/src/faulthandler.rs index 265bcf9ca6d..f618f8f6731 100644 --- a/crates/stdlib/src/faulthandler.rs +++ b/crates/stdlib/src/faulthandler.rs @@ -96,9 +96,8 @@ mod decl { all_threads: AtomicBool::new(true), }; - /// Arc>> - shared frame slot for a thread #[cfg(feature = "threading")] - type ThreadFrameSlot = Arc>>; + type ThreadFrameSlot = Arc; // Watchdog thread state for dump_traceback_later struct WatchdogState { @@ -326,7 +325,7 @@ mod decl { /// Write a frame's info to an fd using signal-safe I/O. #[cfg(any(unix, windows))] - fn dump_frame_from_ref(fd: i32, frame: &crate::vm::PyRef) { + fn dump_frame_from_ref(fd: i32, frame: &crate::vm::Py) { let funcname = frame.code.obj_name.as_str(); let filename = frame.code.source_path().as_str(); let lineno = if frame.lasti() == 0 { @@ -345,20 +344,23 @@ mod decl { } /// Dump traceback for a thread given its frame stack (for cross-thread dumping). + /// # Safety + /// Each `FramePtr` must point to a live frame (caller holds the Mutex). #[cfg(all(any(unix, windows), feature = "threading"))] fn dump_traceback_thread_frames( fd: i32, thread_id: u64, is_current: bool, - frames: &[crate::vm::frame::FrameRef], + frames: &[rustpython_vm::vm::FramePtr], ) { write_thread_id(fd, thread_id, is_current); if frames.is_empty() { puts(fd, " \n"); } else { - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + // SAFETY: caller holds the Mutex, so the owning thread can't pop. + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -382,8 +384,9 @@ mod decl { } else { puts(fd, "Stack (most recent call first):\n"); let frames = vm.frames.borrow(); - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + // SAFETY: the frame is alive while it's in the Vec + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -410,7 +413,7 @@ mod decl { if tid == current_tid { continue; } - let frames_guard = slot.lock(); + let frames_guard = slot.frames.lock(); dump_traceback_thread_frames(fd, tid, false, &frames_guard); puts(fd, "\n"); } @@ -421,8 +424,8 @@ mod decl { if frames.is_empty() { puts(fd, " \n"); } else { - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -431,8 +434,8 @@ mod decl { { write_thread_id(fd, current_thread_id(), true); let frames = vm.frames.borrow(); - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -870,7 +873,7 @@ mod decl { #[cfg(feature = "threading")] { for (tid, slot) in &thread_frame_slots { - let frames = slot.lock(); + let frames = slot.frames.lock(); dump_traceback_thread_frames(fd, *tid, false, &frames); } } diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index b89cda4fdb5..78b1608673e 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -119,6 +119,7 @@ workspace = true features = [ "Win32_Foundation", "Win32_Globalization", + "Win32_Media_Audio", "Win32_Networking_WinSock", "Win32_Security", "Win32_Security_Authorization", diff --git a/crates/vm/src/builtins/frame.rs b/crates/vm/src/builtins/frame.rs index 5d7510d8ff8..ed2e1e672fd 100644 --- a/crates/vm/src/builtins/frame.rs +++ b/crates/vm/src/builtins/frame.rs @@ -4,7 +4,7 @@ use super::{PyCode, PyDictRef, PyIntRef, PyStrRef}; use crate::{ - AsObject, Context, Py, PyObjectRef, PyRef, PyResult, VirtualMachine, + Context, Py, PyObjectRef, PyRef, PyResult, VirtualMachine, class::PyClassImpl, frame::{Frame, FrameOwner, FrameRef}, function::PySetterValue, @@ -195,16 +195,43 @@ impl Py { #[pygetset] pub fn f_back(&self, vm: &VirtualMachine) -> Option> { - // TODO: actually store f_back inside Frame struct + let previous = self.previous_frame(); + if previous.is_null() { + return None; + } - // get the frame in the frame stack that appears before this one. - // won't work if this frame isn't in the frame stack, hence the todo above - vm.frames + if let Some(frame) = vm + .frames .borrow() .iter() - .rev() - .skip_while(|p| !p.is(self.as_object())) - .nth(1) - .cloned() + .find(|fp| { + // SAFETY: the caller keeps the FrameRef alive while it's in the Vec + let py: &crate::Py = unsafe { fp.as_ref() }; + let ptr: *const Frame = &**py; + core::ptr::eq(ptr, previous) + }) + .map(|fp| unsafe { fp.as_ref() }.to_owned()) + { + return Some(frame); + } + + #[cfg(feature = "threading")] + { + let registry = vm.state.thread_frames.lock(); + for slot in registry.values() { + let frames = slot.frames.lock(); + // SAFETY: the owning thread can't pop while we hold the Mutex, + // so FramePtr is valid for the duration of the lock. + if let Some(frame) = frames.iter().find_map(|fp| { + let f = unsafe { fp.as_ref() }; + let ptr: *const Frame = &**f; + core::ptr::eq(ptr, previous).then(|| f.to_owned()) + }) { + return Some(frame); + } + } + } + + None } } diff --git a/crates/vm/src/coroutine.rs b/crates/vm/src/coroutine.rs index a066c9944fe..ac44e33f799 100644 --- a/crates/vm/src/coroutine.rs +++ b/crates/vm/src/coroutine.rs @@ -1,11 +1,11 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyResult, TryFromObject, VirtualMachine, - builtins::{PyBaseExceptionRef, PyStrRef}, + builtins::PyStrRef, common::lock::PyMutex, exceptions::types::PyBaseException, - frame::{ExecutionResult, FrameOwner, FrameRef}, + frame::{ExecutionResult, Frame, FrameOwner, FrameRef}, function::OptionalArg, - object::{Traverse, TraverseFn}, + object::{PyAtomicRef, Traverse, TraverseFn}, protocol::PyIterReturn, }; use crossbeam_utils::atomic::AtomicCell; @@ -36,7 +36,7 @@ pub struct Coro { // _weakreflist name: PyMutex, qualname: PyMutex, - exception: PyMutex>, // exc_state + exception: PyAtomicRef>, // exc_state } unsafe impl Traverse for Coro { @@ -44,7 +44,9 @@ unsafe impl Traverse for Coro { self.frame.traverse(tracer_fn); self.name.traverse(tracer_fn); self.qualname.traverse(tracer_fn); - self.exception.traverse(tracer_fn); + if let Some(exc) = self.exception.deref() { + exc.traverse(tracer_fn); + } } } @@ -65,7 +67,7 @@ impl Coro { frame, closed: AtomicCell::new(false), running: AtomicCell::new(false), - exception: PyMutex::default(), + exception: PyAtomicRef::from(None), name: PyMutex::new(name), qualname: PyMutex::new(qualname), } @@ -92,33 +94,20 @@ impl Coro { func: F, ) -> PyResult where - F: FnOnce(FrameRef) -> PyResult, + F: FnOnce(&Py) -> PyResult, { if self.running.compare_exchange(false, true).is_err() { return Err(vm.new_value_error(format!("{} already executing", gen_name(jen, vm)))); } - // swap exception state - // Get generator's saved exception state from last yield - let gen_exc = self.exception.lock().take(); - - // Use a slot to capture generator's exception state before with_frame pops - let exception_slot = &self.exception; + // SAFETY: running.compare_exchange guarantees exclusive access + let gen_exc = unsafe { self.exception.swap(None) }; + let exception_ptr = &self.exception as *const PyAtomicRef>; - // Run the generator frame - // with_frame does push_exception(None) which creates a new exception context - // The caller's exception remains in the chain via prev, so topmost_exception() - // will find it if generator's exception is None - let result = vm.with_frame(self.frame.clone(), |f| { - // with_frame pushed None, creating: { exc: None, prev: caller's exc_info } - // Pop None and push generator's exception instead - // This maintains the chain: { exc: gen_exc, prev: caller's exc_info } - vm.pop_exception(); - vm.push_exception(gen_exc); + let result = vm.resume_gen_frame(&self.frame, gen_exc, |f| { let result = func(f); - // Save generator's exception state BEFORE with_frame pops - // This is the generator's current exception context - *exception_slot.lock() = vm.current_exception(); + // SAFETY: exclusive access guaranteed by running flag + let _old = unsafe { (*exception_ptr).swap(vm.current_exception()) }; result }); diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index f24b25c610a..62df1b298e6 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -463,7 +463,7 @@ impl ExecutingFrame<'_> { // Execute until return or exception: let instructions = &self.code.instructions; let mut arg_state = bytecode::OpArgState::default(); - let mut prev_line: usize = 0; + let mut prev_line: u32 = 0; loop { let idx = self.lasti() as usize; // Fire 'line' trace event when line number changes. @@ -472,9 +472,9 @@ impl ExecutingFrame<'_> { if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) && let Some((loc, _)) = self.code.locations.get(idx) - && loc.line.get() != prev_line + && loc.line.get() as u32 != prev_line { - prev_line = loc.line.get(); + prev_line = loc.line.get() as u32; vm.trace_event(crate::protocol::TraceEvent::Line, None)?; } self.update_lasti(|i| *i += 1); @@ -543,13 +543,16 @@ impl ExecutingFrame<'_> { // Check if this is a RERAISE instruction // Both AnyInstruction::Raise { kind: Reraise/ReraiseFromStack } and // AnyInstruction::Reraise are reraise operations that should not add - // new traceback entries + // new traceback entries. + // EndAsyncFor and CleanupThrow also re-raise non-matching exceptions. let is_reraise = match op { Instruction::RaiseVarargs { kind } => matches!( kind.get(arg), bytecode::RaiseKind::BareRaise | bytecode::RaiseKind::ReraiseFromStack ), - Instruction::Reraise { .. } => true, + Instruction::Reraise { .. } + | Instruction::EndAsyncFor + | Instruction::CleanupThrow => true, _ => false, }; @@ -653,6 +656,19 @@ impl ExecutingFrame<'_> { Ok(()) }; if let Err(err) = close_result { + let idx = self.lasti().saturating_sub(1) as usize; + if idx < self.code.locations.len() { + let (loc, _end_loc) = self.code.locations[idx]; + let next = err.__traceback__(); + let new_traceback = PyTraceback::new( + next, + self.object.to_owned(), + idx as u32 * 2, + loc.line, + ); + err.set_traceback_typed(Some(new_traceback.into_ref(&vm.ctx))); + } + self.push_value(vm.ctx.none()); vm.contextualize_exception(&err); return match self.unwind_blocks(vm, UnwindReason::Raising { exception: err }) { @@ -678,6 +694,23 @@ impl ExecutingFrame<'_> { Either::B(meth) => meth.call((exc_type, exc_val, exc_tb), vm), }; return ret.map(ExecutionResult::Yield).or_else(|err| { + // Add traceback entry for the yield-from/await point. + // gen_send_ex2 resumes the frame with a pending exception, + // which goes through error: → PyTraceBack_Here. We add the + // entry here before calling unwind_blocks. + let idx = self.lasti().saturating_sub(1) as usize; + if idx < self.code.locations.len() { + let (loc, _end_loc) = self.code.locations[idx]; + let next = err.__traceback__(); + let new_traceback = PyTraceback::new( + next, + self.object.to_owned(), + idx as u32 * 2, + loc.line, + ); + err.set_traceback_typed(Some(new_traceback.into_ref(&vm.ctx))); + } + self.push_value(vm.ctx.none()); vm.contextualize_exception(&err); match self.unwind_blocks(vm, UnwindReason::Raising { exception: err }) { @@ -3010,13 +3043,54 @@ impl ExecutingFrame<'_> { Ok(None) } + /// _PyEval_UnpackIterableStackRef fn unpack_sequence(&mut self, size: u32, vm: &VirtualMachine) -> FrameResult { let value = self.pop_value(); + let size = size as usize; + + // Fast path for exact tuple/list types (not subclasses) — check + // length directly without creating an iterator, matching + // UNPACK_SEQUENCE_TUPLE / UNPACK_SEQUENCE_LIST specializations. + let cls = value.class(); + let fast_elements: Option> = if cls.is(vm.ctx.types.tuple_type) { + Some(value.downcast_ref::().unwrap().as_slice().to_vec()) + } else if cls.is(vm.ctx.types.list_type) { + Some( + value + .downcast_ref::() + .unwrap() + .borrow_vec() + .to_vec(), + ) + } else { + None + }; + if let Some(elements) = fast_elements { + return match elements.len().cmp(&size) { + core::cmp::Ordering::Equal => { + self.state + .stack + .extend(elements.into_iter().rev().map(Some)); + Ok(None) + } + core::cmp::Ordering::Greater => Err(vm.new_value_error(format!( + "too many values to unpack (expected {size}, got {})", + elements.len() + ))), + core::cmp::Ordering::Less => Err(vm.new_value_error(format!( + "not enough values to unpack (expected {size}, got {})", + elements.len() + ))), + }; + } + + // General path — iterate up to `size + 1` elements to avoid + // consuming the entire iterator (fixes hang on infinite sequences). let not_iterable = value.class().slots.iter.load().is_none() && value .get_class_attr(vm.ctx.intern_str("__getitem__")) .is_none(); - let elements: Vec<_> = value.try_to_value(vm).map_err(|e| { + let iter = PyIter::try_from_object(vm, value.clone()).map_err(|e| { if not_iterable && e.class().is(vm.ctx.exceptions.type_error) { vm.new_type_error(format!( "cannot unpack non-iterable {} object", @@ -3026,24 +3100,48 @@ impl ExecutingFrame<'_> { e } })?; - let msg = match elements.len().cmp(&(size as usize)) { - core::cmp::Ordering::Equal => { - // Wrap each element in Some() for Option stack + + let mut elements = Vec::with_capacity(size); + for _ in 0..size { + match iter.next(vm)? { + PyIterReturn::Return(item) => elements.push(item), + PyIterReturn::StopIteration(_) => { + return Err(vm.new_value_error(format!( + "not enough values to unpack (expected {size}, got {})", + elements.len() + ))); + } + } + } + + // Check that the iterator is exhausted. + match iter.next(vm)? { + PyIterReturn::Return(_) => { + // For exact dict types, show "got N" using the container's + // size (PyDict_Size). Exact tuple/list are handled by the + // fast path above and never reach here. + let msg = if value.class().is(vm.ctx.types.dict_type) { + if let Ok(got) = value.length(vm) { + if got > size { + format!("too many values to unpack (expected {size}, got {got})") + } else { + format!("too many values to unpack (expected {size})") + } + } else { + format!("too many values to unpack (expected {size})") + } + } else { + format!("too many values to unpack (expected {size})") + }; + Err(vm.new_value_error(msg)) + } + PyIterReturn::StopIteration(_) => { self.state .stack .extend(elements.into_iter().rev().map(Some)); - return Ok(None); - } - core::cmp::Ordering::Greater => { - format!("too many values to unpack (expected {size})") + Ok(None) } - core::cmp::Ordering::Less => format!( - "not enough values to unpack (expected {}, got {})", - size, - elements.len() - ), - }; - Err(vm.new_value_error(msg)) + } } fn convert_value( diff --git a/crates/vm/src/protocol/callable.rs b/crates/vm/src/protocol/callable.rs index 9308ec8ffe2..9a621dee4f8 100644 --- a/crates/vm/src/protocol/callable.rs +++ b/crates/vm/src/protocol/callable.rs @@ -2,7 +2,7 @@ use crate::{ builtins::{PyBoundMethod, PyFunction}, function::{FuncArgs, IntoFuncArgs}, types::GenericMethod, - {AsObject, PyObject, PyObjectRef, PyResult, VirtualMachine}, + {PyObject, PyObjectRef, PyResult, VirtualMachine}, }; impl PyObject { @@ -111,12 +111,11 @@ impl VirtualMachine { return Ok(()); } - let frame_ref = self.current_frame(); - if frame_ref.is_none() { + let Some(frame_ref) = self.current_frame() else { return Ok(()); - } + }; - let frame = frame_ref.unwrap().as_object().to_owned(); + let frame: PyObjectRef = frame_ref.into(); let event = self.ctx.new_str(event.to_string()).into(); let args = vec![frame, event, arg.unwrap_or_else(|| self.ctx.none())]; diff --git a/crates/vm/src/stdlib/builtins.rs b/crates/vm/src/stdlib/builtins.rs index 7b24d72d9b5..1b54a26e732 100644 --- a/crates/vm/src/stdlib/builtins.rs +++ b/crates/vm/src/stdlib/builtins.rs @@ -384,7 +384,7 @@ mod builtins { ) } None => ( - vm.current_globals().clone(), + vm.current_globals(), if let Some(locals) = self.locals { locals } else { @@ -503,7 +503,7 @@ mod builtins { #[pyfunction] fn globals(vm: &VirtualMachine) -> PyDictRef { - vm.current_globals().clone() + vm.current_globals() } #[pyfunction] diff --git a/crates/vm/src/stdlib/mod.rs b/crates/vm/src/stdlib/mod.rs index 5bec43f7222..e3bf42a4f77 100644 --- a/crates/vm/src/stdlib/mod.rs +++ b/crates/vm/src/stdlib/mod.rs @@ -67,6 +67,8 @@ pub mod sys; mod winapi; #[cfg(all(feature = "host_env", windows))] mod winreg; +#[cfg(all(feature = "host_env", windows))] +mod winsound; use crate::{Context, builtins::PyModuleDef}; @@ -132,6 +134,8 @@ pub fn builtin_module_defs(ctx: &Context) -> Vec<&'static PyModuleDef> { #[cfg(all(feature = "host_env", windows))] winreg::module_def(ctx), #[cfg(all(feature = "host_env", windows))] + winsound::module_def(ctx), + #[cfg(all(feature = "host_env", windows))] _wmi::module_def(ctx), ] } diff --git a/crates/vm/src/stdlib/nt.rs b/crates/vm/src/stdlib/nt.rs index 0013aa0f970..5b2cf3b92f5 100644 --- a/crates/vm/src/stdlib/nt.rs +++ b/crates/vm/src/stdlib/nt.rs @@ -6,7 +6,7 @@ pub use module::raw_set_handle_inheritable; #[pymodule(name = "nt", with(super::os::_os))] pub(crate) mod module { use crate::{ - Py, PyObjectRef, PyResult, TryFromObject, VirtualMachine, + Py, PyResult, TryFromObject, VirtualMachine, builtins::{PyBaseExceptionRef, PyDictRef, PyListRef, PyStrRef, PyTupleRef}, common::{crt_fd, suppress_iph, windows::ToWideString}, convert::ToPyException, @@ -1212,21 +1212,6 @@ pub(crate) mod module { } } - fn envobj_to_dict(env: ArgMapping, vm: &VirtualMachine) -> PyResult { - let obj = env.obj(); - if let Some(dict) = obj.downcast_ref_if_exact::(vm) { - return Ok(dict.to_owned()); - } - let keys = vm.call_method(obj, "keys", ())?; - let dict = vm.ctx.new_dict(); - for key in keys.get_iter(vm)?.into_iter::(vm)? { - let key = key?; - let val = obj.get_item(&*key, vm)?; - dict.set_item(&*key, val, vm)?; - } - Ok(dict) - } - #[cfg(target_env = "msvc")] #[pyfunction] fn execve( @@ -1261,7 +1246,7 @@ pub(crate) mod module { .chain(once(core::ptr::null())) .collect(); - let env = envobj_to_dict(env, vm)?; + let env = crate::stdlib::os::envobj_to_dict(env, vm)?; // Build environment strings as "KEY=VALUE\0" wide strings let mut env_strings: Vec = Vec::new(); for (key, value) in env.into_iter() { diff --git a/crates/vm/src/stdlib/os.rs b/crates/vm/src/stdlib/os.rs index 8b7d3915278..2fb71d9ec01 100644 --- a/crates/vm/src/stdlib/os.rs +++ b/crates/vm/src/stdlib/os.rs @@ -2,10 +2,10 @@ use crate::{ AsObject, Py, PyObjectRef, PyPayload, PyResult, TryFromObject, VirtualMachine, - builtins::{PyModule, PySet}, + builtins::{PyDictRef, PyModule, PySet}, common::crt_fd, convert::{IntoPyException, ToPyException, ToPyObject}, - function::{ArgumentError, FromArgs, FuncArgs}, + function::{ArgMapping, ArgumentError, FromArgs, FuncArgs}, }; use std::{fs, io, path::Path}; @@ -2038,6 +2038,32 @@ pub fn module_exec(vm: &VirtualMachine, module: &Py) -> PyResult<()> { Ok(()) } +/// Convert a mapping (e.g. os._Environ) to a plain dict for use by execve/posix_spawn. +/// +/// For `os._Environ`, accesses the internal `_data` dict directly at the Rust level. +/// This avoids Python-level method calls that can deadlock after fork() when +/// parking_lot locks are held by threads that no longer exist. +pub(crate) fn envobj_to_dict(env: ArgMapping, vm: &VirtualMachine) -> PyResult { + let obj = env.obj(); + if let Some(dict) = obj.downcast_ref_if_exact::(vm) { + return Ok(dict.to_owned()); + } + if let Some(inst_dict) = obj.dict() + && let Ok(Some(data)) = inst_dict.get_item_opt("_data", vm) + && let Some(dict) = data.downcast_ref_if_exact::(vm) + { + return Ok(dict.to_owned()); + } + let keys = vm.call_method(obj, "keys", ())?; + let dict = vm.ctx.new_dict(); + for key in keys.get_iter(vm)?.into_iter::(vm)? { + let key = key?; + let val = obj.get_item(&*key, vm)?; + dict.set_item(&*key, val, vm)?; + } + Ok(dict) +} + #[cfg(not(windows))] use super::posix as platform; diff --git a/crates/vm/src/stdlib/posix.rs b/crates/vm/src/stdlib/posix.rs index 1c4b502f9e2..ce70412df76 100644 --- a/crates/vm/src/stdlib/posix.rs +++ b/crates/vm/src/stdlib/posix.rs @@ -716,7 +716,15 @@ pub mod module { vm.signal_handlers .get_or_init(crate::signal::new_signal_handlers); - let after_forkers_child: Vec = vm.state.after_forkers_child.lock().clone(); + let after_forkers_child = match vm.state.after_forkers_child.try_lock() { + Some(guard) => guard.clone(), + None => { + // SAFETY: After fork in child process, only the current thread + // exists. The lock holder no longer exists. + unsafe { vm.state.after_forkers_child.force_unlock() }; + vm.state.after_forkers_child.lock().clone() + } + }; run_at_forkers(after_forkers_child, false, vm); } @@ -1073,21 +1081,6 @@ pub mod module { .map_err(|err| err.into_pyexception(vm)) } - fn envobj_to_dict(env: ArgMapping, vm: &VirtualMachine) -> PyResult { - let obj = env.obj(); - if let Some(dict) = obj.downcast_ref_if_exact::(vm) { - return Ok(dict.to_owned()); - } - let keys = vm.call_method(obj, "keys", ())?; - let dict = vm.ctx.new_dict(); - for key in keys.get_iter(vm)?.into_iter::(vm)? { - let key = key?; - let val = obj.get_item(&*key, vm)?; - dict.set_item(&*key, val, vm)?; - } - Ok(dict) - } - #[pyfunction] fn execve( path: OsPath, @@ -1110,7 +1103,7 @@ pub mod module { return Err(vm.new_value_error("execve() arg 2 first element cannot be empty")); } - let env = envobj_to_dict(env, vm)?; + let env = crate::stdlib::os::envobj_to_dict(env, vm)?; let env = env .into_iter() .map(|(k, v)| -> PyResult<_> { diff --git a/crates/vm/src/stdlib/sys.rs b/crates/vm/src/stdlib/sys.rs index 4c672af110b..22b720a1cd2 100644 --- a/crates/vm/src/stdlib/sys.rs +++ b/crates/vm/src/stdlib/sys.rs @@ -41,7 +41,7 @@ mod sys { hash::{PyHash, PyUHash}, }, convert::ToPyObject, - frame::FrameRef, + frame::{Frame, FrameRef}, function::{FuncArgs, KwArgs, OptionalArg, PosArgs}, stdlib::{builtins, warnings::warn}, types::PyStructSequence, @@ -825,11 +825,13 @@ mod sys { let stderr = super::get_stderr(vm)?; match vm.normalize_exception(exc_type.clone(), exc_val.clone(), exc_tb) { Ok(exc) => { - // Try Python traceback module first for richer output - // (enables features like keyword typo suggestions in SyntaxError) + // PyErr_Display: try traceback._print_exception_bltin first if let Ok(tb_mod) = vm.import("traceback", 0) - && let Ok(print_exc) = tb_mod.get_attr("print_exception", vm) - && print_exc.call((exc.as_object().to_owned(),), vm).is_ok() + && let Ok(print_exc_builtin) = + tb_mod.get_attr("_print_exception_bltin", vm) + && print_exc_builtin + .call((exc.as_object().to_owned(),), vm) + .is_ok() { return Ok(()); } @@ -971,12 +973,14 @@ mod sys { #[pyfunction] fn _getframe(offset: OptionalArg, vm: &VirtualMachine) -> PyResult { let offset = offset.into_option().unwrap_or(0); - if offset > vm.frames.borrow().len() - 1 { + let frames = vm.frames.borrow(); + if offset >= frames.len() { return Err(vm.new_value_error("call stack is not deep enough")); } - let idx = vm.frames.borrow().len() - offset - 1; - let frame = &vm.frames.borrow()[idx]; - Ok(frame.clone()) + let idx = frames.len() - offset - 1; + // SAFETY: the FrameRef is alive on the call stack while it's in the Vec + let py: &crate::Py = unsafe { frames[idx].as_ref() }; + Ok(py.to_owned()) } #[pyfunction] @@ -984,15 +988,19 @@ mod sys { let depth = depth.into_option().unwrap_or(0); // Get the frame at the specified depth - if depth > vm.frames.borrow().len() - 1 { - return Ok(vm.ctx.none()); - } - - let idx = vm.frames.borrow().len() - depth - 1; - let frame = &vm.frames.borrow()[idx]; + let func_obj = { + let frames = vm.frames.borrow(); + if depth >= frames.len() { + return Ok(vm.ctx.none()); + } + let idx = frames.len() - depth - 1; + // SAFETY: the FrameRef is alive on the call stack while it's in the Vec + let frame: &crate::Py = unsafe { frames[idx].as_ref() }; + frame.func_obj.clone() + }; // If the frame has a function object, return its __module__ attribute - if let Some(func_obj) = &frame.func_obj { + if let Some(func_obj) = func_obj { match func_obj.get_attr(identifier!(vm, __module__), vm) { Ok(module) => Ok(module), Err(_) => { diff --git a/crates/vm/src/stdlib/thread.rs b/crates/vm/src/stdlib/thread.rs index 12a741f62f0..bf495ecc382 100644 --- a/crates/vm/src/stdlib/thread.rs +++ b/crates/vm/src/stdlib/thread.rs @@ -891,7 +891,14 @@ pub(crate) mod _thread { let registry = vm.state.thread_frames.lock(); registry .iter() - .filter_map(|(id, slot)| slot.lock().last().cloned().map(|f| (*id, f))) + .filter_map(|(id, slot)| { + let frames = slot.frames.lock(); + // SAFETY: the owning thread can't pop while we hold the Mutex, + // so the FramePtr is valid for the duration of the lock. + frames + .last() + .map(|fp| (*id, unsafe { fp.as_ref() }.to_owned())) + }) .collect() } diff --git a/crates/vm/src/stdlib/winsound.rs b/crates/vm/src/stdlib/winsound.rs new file mode 100644 index 00000000000..3f65abbb890 --- /dev/null +++ b/crates/vm/src/stdlib/winsound.rs @@ -0,0 +1,206 @@ +// spell-checker:ignore pszSound fdwSound +#![allow(non_snake_case)] + +pub(crate) use winsound::module_def; + +mod win32 { + #[link(name = "winmm")] + unsafe extern "system" { + pub fn PlaySoundW(pszSound: *const u16, hmod: isize, fdwSound: u32) -> i32; + } + + unsafe extern "system" { + pub fn Beep(dwFreq: u32, dwDuration: u32) -> i32; + pub fn MessageBeep(uType: u32) -> i32; + } +} + +#[pymodule] +mod winsound { + use crate::builtins::{PyBytes, PyStr}; + use crate::common::windows::ToWideString; + use crate::convert::{IntoPyException, TryFromBorrowedObject}; + use crate::protocol::PyBuffer; + use crate::{AsObject, PyObjectRef, PyResult, VirtualMachine}; + + // PlaySound flags + #[pyattr] + const SND_SYNC: u32 = 0x0000; + #[pyattr] + const SND_ASYNC: u32 = 0x0001; + #[pyattr] + const SND_NODEFAULT: u32 = 0x0002; + #[pyattr] + const SND_MEMORY: u32 = 0x0004; + #[pyattr] + const SND_LOOP: u32 = 0x0008; + #[pyattr] + const SND_NOSTOP: u32 = 0x0010; + #[pyattr] + const SND_PURGE: u32 = 0x0040; + #[pyattr] + const SND_APPLICATION: u32 = 0x0080; + #[pyattr] + const SND_NOWAIT: u32 = 0x00002000; + #[pyattr] + const SND_ALIAS: u32 = 0x00010000; + #[pyattr] + const SND_FILENAME: u32 = 0x00020000; + #[pyattr] + const SND_SENTRY: u32 = 0x00080000; + #[pyattr] + const SND_SYSTEM: u32 = 0x00200000; + + // MessageBeep types + #[pyattr] + const MB_OK: u32 = 0x00000000; + #[pyattr] + const MB_ICONHAND: u32 = 0x00000010; + #[pyattr] + const MB_ICONQUESTION: u32 = 0x00000020; + #[pyattr] + const MB_ICONEXCLAMATION: u32 = 0x00000030; + #[pyattr] + const MB_ICONASTERISK: u32 = 0x00000040; + #[pyattr] + const MB_ICONERROR: u32 = MB_ICONHAND; + #[pyattr] + const MB_ICONSTOP: u32 = MB_ICONHAND; + #[pyattr] + const MB_ICONINFORMATION: u32 = MB_ICONASTERISK; + #[pyattr] + const MB_ICONWARNING: u32 = MB_ICONEXCLAMATION; + + #[derive(FromArgs)] + struct PlaySoundArgs { + #[pyarg(any)] + sound: PyObjectRef, + #[pyarg(any)] + flags: i32, + } + + #[pyfunction] + fn PlaySound(args: PlaySoundArgs, vm: &VirtualMachine) -> PyResult<()> { + let sound = args.sound; + let flags = args.flags as u32; + + if vm.is_none(&sound) { + let ok = unsafe { super::win32::PlaySoundW(core::ptr::null(), 0, flags) }; + if ok == 0 { + return Err(vm.new_runtime_error("Failed to play sound".to_owned())); + } + return Ok(()); + } + + if flags & SND_MEMORY != 0 { + if flags & SND_ASYNC != 0 { + return Err( + vm.new_runtime_error("Cannot play asynchronously from memory".to_owned()) + ); + } + let buffer = PyBuffer::try_from_borrowed_object(vm, &sound)?; + let buf = buffer.as_contiguous().ok_or_else(|| { + vm.new_type_error("a bytes-like object is required, not 'str'".to_owned()) + })?; + let ok = unsafe { super::win32::PlaySoundW(buf.as_ptr() as *const u16, 0, flags) }; + if ok == 0 { + return Err(vm.new_runtime_error("Failed to play sound".to_owned())); + } + return Ok(()); + } + + if sound.downcastable::() { + let type_name = sound.class().name().to_string(); + return Err(vm.new_type_error(format!( + "'sound' must be str, os.PathLike, or None, not {type_name}" + ))); + } + + // os.fspath(sound) + let path = match sound.downcast_ref::() { + Some(s) => s.as_str().to_owned(), + None => { + let fspath = vm.get_method_or_type_error( + sound.clone(), + identifier!(vm, __fspath__), + || { + let type_name = sound.class().name().to_string(); + format!("'sound' must be str, os.PathLike, or None, not {type_name}") + }, + )?; + + if vm.is_none(&fspath) { + return Err(vm.new_type_error(format!( + "'sound' must be str, os.PathLike, or None, not {}", + sound.class().name() + ))); + } + let result = fspath.call((), vm)?; + + if result.downcastable::() { + return Err( + vm.new_type_error("'sound' must resolve to str, not bytes".to_owned()) + ); + } + + let s: &PyStr = result.downcast_ref().ok_or_else(|| { + vm.new_type_error(format!( + "expected {}.__fspath__() to return str or bytes, not {}", + sound.class().name(), + result.class().name() + )) + })?; + + s.as_str().to_owned() + } + }; + + // Check for embedded null characters + if path.contains('\0') { + return Err(vm.new_value_error("embedded null character".to_owned())); + } + + let wide = path.to_wide_with_nul(); + let ok = unsafe { super::win32::PlaySoundW(wide.as_ptr(), 0, flags) }; + if ok == 0 { + return Err(vm.new_runtime_error("Failed to play sound".to_owned())); + } + Ok(()) + } + + #[derive(FromArgs)] + struct BeepArgs { + #[pyarg(any)] + frequency: i32, + #[pyarg(any)] + duration: i32, + } + + #[pyfunction] + fn Beep(args: BeepArgs, vm: &VirtualMachine) -> PyResult<()> { + if !(37..=32767).contains(&args.frequency) { + return Err(vm.new_value_error("frequency must be in 37 thru 32767".to_owned())); + } + + let ok = unsafe { super::win32::Beep(args.frequency as u32, args.duration as u32) }; + if ok == 0 { + return Err(vm.new_runtime_error("Failed to beep".to_owned())); + } + Ok(()) + } + + #[derive(FromArgs)] + struct MessageBeepArgs { + #[pyarg(any, default = 0)] + r#type: u32, + } + + #[pyfunction] + fn MessageBeep(args: MessageBeepArgs, vm: &VirtualMachine) -> PyResult<()> { + let ok = unsafe { super::win32::MessageBeep(args.r#type) }; + if ok == 0 { + return Err(std::io::Error::last_os_error().into_pyexception(vm)); + } + Ok(()) + } +} diff --git a/crates/vm/src/vm/compile.rs b/crates/vm/src/vm/compile.rs index 97f0f9e97b8..7294dc8f897 100644 --- a/crates/vm/src/vm/compile.rs +++ b/crates/vm/src/vm/compile.rs @@ -25,6 +25,337 @@ impl VirtualMachine { source_path: String, opts: CompileOpts, ) -> Result, CompileError> { - compiler::compile(source, mode, &source_path, opts).map(|code| self.ctx.new_code(code)) + let code = + compiler::compile(source, mode, &source_path, opts).map(|code| self.ctx.new_code(code)); + #[cfg(feature = "parser")] + if code.is_ok() { + self.emit_string_escape_warnings(source, &source_path); + } + code + } +} + +/// Scan source for invalid escape sequences in all string literals and emit +/// SyntaxWarning. +/// +/// Corresponds to: +/// - `warn_invalid_escape_sequence()` in `Parser/string_parser.c` +/// - `_PyTokenizer_warn_invalid_escape_sequence()` in `Parser/tokenizer/helpers.c` +#[cfg(feature = "parser")] +mod escape_warnings { + use super::*; + use crate::warn; + use ruff_python_ast::{self as ast, visitor::Visitor}; + use ruff_text_size::TextRange; + + /// Calculate 1-indexed line number at byte offset in source. + fn line_number_at(source: &str, offset: usize) -> usize { + source[..offset.min(source.len())] + .bytes() + .filter(|&b| b == b'\n') + .count() + + 1 + } + + /// Get content bounds (start, end byte offsets) of a quoted string literal, + /// excluding prefix characters and quote delimiters. + fn content_bounds(source: &str, range: TextRange) -> Option<(usize, usize)> { + let s = range.start().to_usize(); + let e = range.end().to_usize(); + if s >= e || e > source.len() { + return None; + } + let bytes = &source.as_bytes()[s..e]; + // Skip prefix (u, b, r, etc.) to find the first quote character. + let qi = bytes.iter().position(|&c| c == b'\'' || c == b'"')?; + let qc = bytes[qi]; + let ql = if bytes.get(qi + 1) == Some(&qc) && bytes.get(qi + 2) == Some(&qc) { + 3 + } else { + 1 + }; + let cs = s + qi + ql; + let ce = e.checked_sub(ql)?; + if cs <= ce { Some((cs, ce)) } else { None } + } + + /// Scan `source[start..end]` for the first invalid escape sequence. + /// Returns `Some((invalid_char, byte_offset_in_source))` for the first + /// invalid escape found, or `None` if all escapes are valid. + /// + /// When `is_bytes` is true, `\u`, `\U`, and `\N` are treated as invalid + /// (bytes literals only support byte-oriented escapes). + /// + /// Only reports the **first** invalid escape per string literal, matching + /// `_PyUnicode_DecodeUnicodeEscapeInternal2` which stores only the first + /// `first_invalid_escape_char`. + fn first_invalid_escape( + source: &str, + start: usize, + end: usize, + is_bytes: bool, + ) -> Option<(char, usize)> { + let raw = &source[start..end]; + let mut chars = raw.char_indices().peekable(); + while let Some((i, ch)) = chars.next() { + if ch != '\\' { + continue; + } + let Some((_, next)) = chars.next() else { + break; + }; + let valid = match next { + '\\' | '\'' | '"' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' => true, + '\n' => true, + '\r' => { + if matches!(chars.peek(), Some(&(_, '\n'))) { + chars.next(); + } + true + } + '0'..='7' => { + for _ in 0..2 { + if matches!(chars.peek(), Some(&(_, '0'..='7'))) { + chars.next(); + } else { + break; + } + } + true + } + 'x' | 'u' | 'U' => { + // \u and \U are only valid in string literals, not bytes + if is_bytes && next != 'x' { + false + } else { + let count = match next { + 'x' => 2, + 'u' => 4, + 'U' => 8, + _ => unreachable!(), + }; + for _ in 0..count { + if chars.peek().is_some_and(|&(_, c)| c.is_ascii_hexdigit()) { + chars.next(); + } else { + break; + } + } + true + } + } + 'N' => { + // \N{name} is only valid in string literals, not bytes + if is_bytes { + false + } else { + if matches!(chars.peek(), Some(&(_, '{'))) { + chars.next(); + for (_, c) in chars.by_ref() { + if c == '}' { + break; + } + } + } + true + } + } + _ => false, + }; + if !valid { + return Some((next, start + i)); + } + } + None + } + + /// Emit `SyntaxWarning` for an invalid escape sequence. + /// + /// `warn_invalid_escape_sequence()` in `Parser/string_parser.c` + fn warn_invalid_escape_sequence( + source: &str, + ch: char, + offset: usize, + filename: &str, + vm: &VirtualMachine, + ) { + let lineno = line_number_at(source, offset); + let message = vm.ctx.new_str(format!( + "\"\\{ch}\" is an invalid escape sequence. \ + Such sequences will not work in the future. \ + Did you mean \"\\\\{ch}\"? A raw string is also an option." + )); + let fname = vm.ctx.new_str(filename); + let _ = warn::warn_explicit( + Some(vm.ctx.exceptions.syntax_warning.to_owned()), + message.into(), + fname, + lineno, + None, + vm.ctx.none(), + None, + None, + vm, + ); + } + + struct EscapeWarningVisitor<'a> { + source: &'a str, + filename: &'a str, + vm: &'a VirtualMachine, + } + + impl<'a> EscapeWarningVisitor<'a> { + /// Check a quoted string/bytes literal for invalid escapes. + /// The range must include the prefix and quote delimiters. + fn check_quoted_literal(&self, range: TextRange, is_bytes: bool) { + if let Some((start, end)) = content_bounds(self.source, range) + && let Some((ch, offset)) = first_invalid_escape(self.source, start, end, is_bytes) + { + warn_invalid_escape_sequence(self.source, ch, offset, self.filename, self.vm); + } + } + + /// Check an f-string literal element for invalid escapes. + /// The range covers content only (no prefix/quotes). + /// + /// Also handles `\{` / `\}` at the literal–interpolation boundary, + /// equivalent to `_PyTokenizer_warn_invalid_escape_sequence` handling + /// `FSTRING_MIDDLE` / `FSTRING_END` tokens. + fn check_fstring_literal(&self, range: TextRange) { + let start = range.start().to_usize(); + let end = range.end().to_usize(); + if start >= end || end > self.source.len() { + return; + } + if let Some((ch, offset)) = first_invalid_escape(self.source, start, end, false) { + warn_invalid_escape_sequence(self.source, ch, offset, self.filename, self.vm); + return; + } + // In CPython, _PyTokenizer_warn_invalid_escape_sequence handles + // `\{` and `\}` for FSTRING_MIDDLE/FSTRING_END tokens. Ruff + // splits the literal element before the interpolation delimiter, + // so the `\` sits at the end of the literal range and the `{`/`}` + // sits just after it. Only warn when the number of trailing + // backslashes is odd (an even count means they are all escaped). + let trailing_bs = self.source.as_bytes()[start..end] + .iter() + .rev() + .take_while(|&&b| b == b'\\') + .count(); + if trailing_bs % 2 == 1 + && let Some(&after) = self.source.as_bytes().get(end) + && (after == b'{' || after == b'}') + { + warn_invalid_escape_sequence( + self.source, + after as char, + end - 1, + self.filename, + self.vm, + ); + } + } + + /// Visit f-string elements, checking literals and recursing into + /// interpolation expressions and format specs. + fn visit_fstring_elements(&mut self, elements: &'a ast::InterpolatedStringElements) { + for element in elements { + match element { + ast::InterpolatedStringElement::Literal(lit) => { + self.check_fstring_literal(lit.range); + } + ast::InterpolatedStringElement::Interpolation(interp) => { + self.visit_expr(&interp.expression); + if let Some(spec) = &interp.format_spec { + self.visit_fstring_elements(&spec.elements); + } + } + } + } + } + } + + impl<'a> Visitor<'a> for EscapeWarningVisitor<'a> { + fn visit_expr(&mut self, expr: &'a ast::Expr) { + match expr { + // Regular string literals — decode_unicode_with_escapes path + ast::Expr::StringLiteral(string) => { + for part in string.value.as_slice() { + if !matches!( + part.flags.prefix(), + ast::str_prefix::StringLiteralPrefix::Raw { .. } + ) { + self.check_quoted_literal(part.range, false); + } + } + } + // Byte string literals — decode_bytes_with_escapes path + ast::Expr::BytesLiteral(bytes) => { + for part in bytes.value.as_slice() { + if !matches!( + part.flags.prefix(), + ast::str_prefix::ByteStringPrefix::Raw { .. } + ) { + self.check_quoted_literal(part.range, true); + } + } + } + // F-string literals — tokenizer + string_parser paths + ast::Expr::FString(fstring_expr) => { + for part in fstring_expr.value.as_slice() { + match part { + ast::FStringPart::Literal(string_lit) => { + // Plain string part in f-string concatenation + if !matches!( + string_lit.flags.prefix(), + ast::str_prefix::StringLiteralPrefix::Raw { .. } + ) { + self.check_quoted_literal(string_lit.range, false); + } + } + ast::FStringPart::FString(fstring) => { + if matches!( + fstring.flags.prefix(), + ast::str_prefix::FStringPrefix::Raw { .. } + ) { + continue; + } + self.visit_fstring_elements(&fstring.elements); + } + } + } + } + _ => ast::visitor::walk_expr(self, expr), + } + } + } + + impl VirtualMachine { + /// Walk all string literals in `source` and emit `SyntaxWarning` for + /// each that contains an invalid escape sequence. + pub(super) fn emit_string_escape_warnings(&self, source: &str, filename: &str) { + let Ok(parsed) = + ruff_python_parser::parse(source, ruff_python_parser::Mode::Module.into()) + else { + return; + }; + let ast = parsed.into_syntax(); + let mut visitor = EscapeWarningVisitor { + source, + filename, + vm: self, + }; + match ast { + ast::Mod::Module(module) => { + for stmt in &module.body { + visitor.visit_stmt(stmt); + } + } + ast::Mod::Expression(expr) => { + visitor.visit_expr(&expr.body); + } + } + } } } diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 10409d943b3..07395b80460 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -41,7 +41,8 @@ use crate::{ }; use alloc::{borrow::Cow, collections::BTreeMap}; use core::{ - cell::{Cell, OnceCell, Ref, RefCell}, + cell::{Cell, OnceCell, RefCell}, + ptr::NonNull, sync::atomic::{AtomicBool, Ordering}, }; use crossbeam_utils::atomic::AtomicCell; @@ -72,7 +73,7 @@ pub struct VirtualMachine { pub builtins: PyRef, pub sys_module: PyRef, pub ctx: PyRc, - pub frames: RefCell>, + pub frames: RefCell>, pub wasm_id: Option, exceptions: RefCell, pub import_func: PyObjectRef, @@ -99,10 +100,26 @@ pub struct VirtualMachine { pub asyncio_running_task: RefCell>, } +/// Non-owning frame pointer for the frames stack. +/// The pointed-to frame is kept alive by the caller of with_frame_exc/resume_gen_frame. +#[derive(Copy, Clone)] +pub struct FramePtr(NonNull>); + +impl FramePtr { + /// # Safety + /// The pointed-to frame must still be alive. + pub unsafe fn as_ref(&self) -> &Py { + unsafe { self.0.as_ref() } + } +} + +// SAFETY: FramePtr is only stored in the VM's frames Vec while the corresponding +// FrameRef is alive on the call stack. The Vec is always empty when the VM moves between threads. +unsafe impl Send for FramePtr {} + #[derive(Debug, Default)] struct ExceptionStack { - exc: Option, - prev: Option>, + stack: Vec>, } pub struct PyGlobalState { @@ -129,7 +146,7 @@ pub struct PyGlobalState { /// Main thread identifier (pthread_self on Unix) #[cfg(feature = "threading")] pub main_thread_ident: AtomicCell, - /// Registry of all threads' current frames for sys._current_frames() + /// Registry of all threads' slots for sys._current_frames() and sys._current_exceptions() #[cfg(feature = "threading")] pub thread_frames: parking_lot::Mutex>, /// Registry of all ThreadHandles for fork cleanup @@ -997,31 +1014,55 @@ impl VirtualMachine { &self, frame: FrameRef, f: F, + ) -> PyResult { + self.with_frame_exc(frame, None, f) + } + + /// Like `with_frame` but allows specifying the initial exception state. + pub fn with_frame_exc PyResult>( + &self, + frame: FrameRef, + exc: Option, + f: F, ) -> PyResult { self.with_recursion("", || { - self.frames.borrow_mut().push(frame.clone()); + // SAFETY: `frame` (FrameRef) stays alive for the entire closure scope, + // keeping the FramePtr valid. We pass a clone to `f` so that `f` + // consuming its FrameRef doesn't invalidate our pointer. + let fp = FramePtr(NonNull::from(&*frame)); + self.frames.borrow_mut().push(fp); // Update the shared frame stack for sys._current_frames() and faulthandler #[cfg(feature = "threading")] - crate::vm::thread::push_thread_frame(frame.clone()); + crate::vm::thread::push_thread_frame(fp); // Link frame into the signal-safe frame chain (previous pointer) - let frame_ptr: *const Frame = &**frame; - let old_frame = crate::vm::thread::set_current_frame(frame_ptr); + let old_frame = crate::vm::thread::set_current_frame((&**frame) as *const Frame); frame.previous.store( old_frame as *mut Frame, core::sync::atomic::Ordering::Relaxed, ); - // Push a new exception context for frame isolation - // Each frame starts with no active exception (None) - // This prevents exceptions from leaking between function calls - self.push_exception(None); + // Push exception context for frame isolation. + // For normal calls: None (clean slate). + // For generators: the saved exception from last yield. + self.push_exception(exc); let old_owner = frame.owner.swap( crate::frame::FrameOwner::Thread as i8, core::sync::atomic::Ordering::AcqRel, ); + + // Ensure cleanup on panic: restore owner, pop exception, frame chain, and frames Vec. + scopeguard::defer! { + frame.owner.store(old_owner, core::sync::atomic::Ordering::Release); + self.pop_exception(); + crate::vm::thread::set_current_frame(old_frame); + self.frames.borrow_mut().pop(); + #[cfg(feature = "threading")] + crate::vm::thread::pop_thread_frame(); + } + use crate::protocol::TraceEvent; // Fire 'call' trace event after pushing frame // (current_frame() now returns the callee's frame) - let result = match self.trace_event(TraceEvent::Call, None) { + match self.trace_event(TraceEvent::Call, None) { Ok(()) => { // Set per-frame trace function so line events fire for this frame. // Frames entered before sys.settrace() keep trace=None and skip line events. @@ -1031,7 +1072,7 @@ impl VirtualMachine { *frame.trace.lock() = trace_func; } } - let result = f(frame); + let result = f(frame.clone()); // Fire 'return' trace event on success if result.is_ok() { let _ = self.trace_event(TraceEvent::Return, None); @@ -1039,23 +1080,67 @@ impl VirtualMachine { result } Err(e) => Err(e), - }; - // SAFETY: frame_ptr is valid because self.frames holds a clone - // of the frame, keeping the underlying allocation alive. - unsafe { &*frame_ptr } - .owner - .store(old_owner, core::sync::atomic::Ordering::Release); - // Pop the exception context - restores caller's exception state - self.pop_exception(); - // Restore previous frame as current (unlink from chain) + } + }) + } + + /// Lightweight frame execution for generator/coroutine resume. + /// Pushes to the thread frame stack and fires trace/profile events, + /// but skips the thread exception update for performance. + pub fn resume_gen_frame) -> PyResult>( + &self, + frame: &FrameRef, + exc: Option, + f: F, + ) -> PyResult { + self.check_recursive_call("")?; + if self.check_c_stack_overflow() { + return Err(self.new_recursion_error(String::new())); + } + self.recursion_depth.update(|d| d + 1); + + // SAFETY: frame (&FrameRef) stays alive for the duration, so NonNull is valid until pop. + let fp = FramePtr(NonNull::from(&**frame)); + self.frames.borrow_mut().push(fp); + #[cfg(feature = "threading")] + crate::vm::thread::push_thread_frame(fp); + let old_frame = crate::vm::thread::set_current_frame((&***frame) as *const Frame); + frame.previous.store( + old_frame as *mut Frame, + core::sync::atomic::Ordering::Relaxed, + ); + // Inline exception push without thread exception update + self.exceptions.borrow_mut().stack.push(exc); + let old_owner = frame.owner.swap( + crate::frame::FrameOwner::Thread as i8, + core::sync::atomic::Ordering::AcqRel, + ); + + // Ensure cleanup on panic: restore owner, pop exception, frame chain, frames Vec, + // and recursion depth. + scopeguard::defer! { + frame.owner.store(old_owner, core::sync::atomic::Ordering::Release); + self.exceptions.borrow_mut().stack + .pop() + .expect("pop_exception() without nested exc stack"); crate::vm::thread::set_current_frame(old_frame); - // defer dec frame - let _popped = self.frames.borrow_mut().pop(); - // Pop from shared frame stack + self.frames.borrow_mut().pop(); #[cfg(feature = "threading")] crate::vm::thread::pop_thread_frame(); - result - }) + self.recursion_depth.update(|d| d - 1); + } + + use crate::protocol::TraceEvent; + match self.trace_event(TraceEvent::Call, None) { + Ok(()) => { + let result = f(frame); + if result.is_ok() { + let _ = self.trace_event(TraceEvent::Return, None); + } + result + } + Err(e) => Err(e), + } } /// Returns a basic CompileOpts instance with options accurate to the vm. Used @@ -1077,15 +1162,11 @@ impl VirtualMachine { } } - pub fn current_frame(&self) -> Option> { - let frames = self.frames.borrow(); - if frames.is_empty() { - None - } else { - Some(Ref::map(self.frames.borrow(), |frames| { - frames.last().unwrap() - })) - } + pub fn current_frame(&self) -> Option { + self.frames.borrow().last().map(|fp| { + // SAFETY: the caller keeps the FrameRef alive while it's in the Vec + unsafe { fp.as_ref() }.to_owned() + }) } pub fn current_locals(&self) -> PyResult { @@ -1094,11 +1175,11 @@ impl VirtualMachine { .locals(self) } - pub fn current_globals(&self) -> Ref<'_, PyDictRef> { - let frame = self - .current_frame() - .expect("called current_globals but no frames on the stack"); - Ref::map(frame, |f| &f.globals) + pub fn current_globals(&self) -> PyDictRef { + self.current_frame() + .expect("called current_globals but no frames on the stack") + .globals + .clone() } pub fn try_class(&self, module: &'static str, class: &'static str) -> PyResult { @@ -1351,27 +1432,44 @@ impl VirtualMachine { } pub(crate) fn push_exception(&self, exc: Option) { - let mut excs = self.exceptions.borrow_mut(); - let prev = core::mem::take(&mut *excs); - excs.prev = Some(Box::new(prev)); - excs.exc = exc + self.exceptions.borrow_mut().stack.push(exc); + #[cfg(feature = "threading")] + thread::update_thread_exception(self.topmost_exception()); } pub(crate) fn pop_exception(&self) -> Option { - let mut excs = self.exceptions.borrow_mut(); - let cur = core::mem::take(&mut *excs); - *excs = *cur.prev.expect("pop_exception() without nested exc stack"); - cur.exc + let exc = self + .exceptions + .borrow_mut() + .stack + .pop() + .expect("pop_exception() without nested exc stack"); + #[cfg(feature = "threading")] + thread::update_thread_exception(self.topmost_exception()); + exc } pub(crate) fn current_exception(&self) -> Option { - self.exceptions.borrow().exc.clone() + self.exceptions.borrow().stack.last().cloned().flatten() } pub(crate) fn set_exception(&self, exc: Option) { // don't be holding the RefCell guard while __del__ is called - let prev = core::mem::replace(&mut self.exceptions.borrow_mut().exc, exc); - drop(prev); + let mut excs = self.exceptions.borrow_mut(); + debug_assert!( + !excs.stack.is_empty(), + "set_exception called with empty exception stack" + ); + if let Some(top) = excs.stack.last_mut() { + let prev = core::mem::replace(top, exc); + drop(excs); + drop(prev); + } else { + excs.stack.push(exc); + drop(excs); + } + #[cfg(feature = "threading")] + thread::update_thread_exception(self.topmost_exception()); } pub(crate) fn contextualize_exception(&self, exception: &Py) { @@ -1404,13 +1502,7 @@ impl VirtualMachine { pub(crate) fn topmost_exception(&self) -> Option { let excs = self.exceptions.borrow(); - let mut cur = &*excs; - loop { - if let Some(exc) = &cur.exc { - return Some(exc.clone()); - } - cur = cur.prev.as_deref()?; - } + excs.stack.iter().rev().find_map(|e| e.clone()) } pub fn handle_exit_exception(&self, exc: PyBaseExceptionRef) -> u32 { diff --git a/crates/vm/src/vm/python_run.rs b/crates/vm/src/vm/python_run.rs index e651b34cc50..70d845b03f5 100644 --- a/crates/vm/src/vm/python_run.rs +++ b/crates/vm/src/vm/python_run.rs @@ -1,7 +1,8 @@ //! Python code execution functions. use crate::{ - PyResult, VirtualMachine, + AsObject, PyRef, PyResult, VirtualMachine, + builtins::PyCode, compiler::{self}, scope::Scope, }; @@ -22,9 +23,22 @@ impl VirtualMachine { let code_obj = self .compile(source, compiler::Mode::Exec, source_path) .map_err(|err| self.new_syntax_error(&err, Some(source)))?; + // linecache._register_code(code, source, filename) + let _ = self.register_code_in_linecache(&code_obj, source); self.run_code_obj(code_obj, scope) } + /// Register a code object's source in linecache._interactive_cache + /// so that traceback can display source lines and caret indicators. + fn register_code_in_linecache(&self, code: &PyRef, source: &str) -> PyResult<()> { + let linecache = self.import("linecache", 0)?; + let register = linecache.get_attr("_register_code", self)?; + let source_str = self.ctx.new_str(source); + let filename = self.ctx.new_str(code.source_path().as_str()); + register.call((code.as_object().to_owned(), source_str, filename), self)?; + Ok(()) + } + #[deprecated(note = "use run_string instead")] pub fn run_code_string(&self, scope: Scope, source: &str, source_path: String) -> PyResult { self.run_string(scope, source, source_path) diff --git a/crates/vm/src/vm/thread.rs b/crates/vm/src/vm/thread.rs index af69fa8d8e5..575910f7900 100644 --- a/crates/vm/src/vm/thread.rs +++ b/crates/vm/src/vm/thread.rs @@ -1,6 +1,8 @@ -use crate::frame::Frame; #[cfg(feature = "threading")] -use crate::frame::FrameRef; +use super::FramePtr; +#[cfg(feature = "threading")] +use crate::builtins::PyBaseExceptionRef; +use crate::frame::Frame; use crate::{AsObject, PyObject, VirtualMachine}; #[cfg(feature = "threading")] use alloc::sync::Arc; @@ -12,20 +14,27 @@ use core::{ use itertools::Itertools; use std::thread_local; -/// Type for current frame slot - shared between threads for sys._current_frames() -/// Stores the full frame stack so faulthandler can dump complete tracebacks -/// for all threads. +/// Per-thread shared state for sys._current_frames() and sys._current_exceptions(). +/// The exception field uses atomic operations for lock-free cross-thread reads. +#[cfg(feature = "threading")] +pub struct ThreadSlot { + /// Raw frame pointers, valid while the owning thread's call stack is active. + /// Readers must hold the Mutex and convert to FrameRef inside the lock. + pub frames: parking_lot::Mutex>, + pub exception: crate::PyAtomicRef>, +} + #[cfg(feature = "threading")] -pub type CurrentFrameSlot = Arc>>; +pub type CurrentFrameSlot = Arc; thread_local! { pub(super) static VM_STACK: RefCell>> = Vec::with_capacity(1).into(); pub(crate) static COROUTINE_ORIGIN_TRACKING_DEPTH: Cell = const { Cell::new(0) }; - /// Current thread's frame slot for sys._current_frames() + /// Current thread's slot for sys._current_frames() and sys._current_exceptions() #[cfg(feature = "threading")] - static CURRENT_FRAME_SLOT: RefCell> = const { RefCell::new(None) }; + static CURRENT_THREAD_SLOT: RefCell> = const { RefCell::new(None) }; /// Current top frame for signal-safe traceback walking. /// Mirrors `PyThreadState.current_frame`. Read by faulthandler's signal @@ -49,23 +58,26 @@ pub fn enter_vm(vm: &VirtualMachine, f: impl FnOnce() -> R) -> R { VM_STACK.with(|vms| { vms.borrow_mut().push(vm.into()); - // Initialize frame slot for this thread if not already done + // Initialize thread slot for this thread if not already done #[cfg(feature = "threading")] - init_frame_slot_if_needed(vm); + init_thread_slot_if_needed(vm); scopeguard::defer! { vms.borrow_mut().pop(); } VM_CURRENT.set(vm, f) }) } -/// Initialize frame slot for current thread if not already initialized. +/// Initialize thread slot for current thread if not already initialized. /// Called automatically by enter_vm(). #[cfg(feature = "threading")] -fn init_frame_slot_if_needed(vm: &VirtualMachine) { - CURRENT_FRAME_SLOT.with(|slot| { +fn init_thread_slot_if_needed(vm: &VirtualMachine) { + CURRENT_THREAD_SLOT.with(|slot| { if slot.borrow().is_none() { let thread_id = crate::stdlib::thread::get_ident(); - let new_slot = Arc::new(parking_lot::Mutex::new(Vec::new())); + let new_slot = Arc::new(ThreadSlot { + frames: parking_lot::Mutex::new(Vec::new()), + exception: crate::PyAtomicRef::from(None::), + }); vm.state .thread_frames .lock() @@ -75,13 +87,18 @@ fn init_frame_slot_if_needed(vm: &VirtualMachine) { }); } -/// Push a frame onto the current thread's shared frame stack. -/// Called when a new frame is entered. +/// Push a frame pointer onto the current thread's shared frame stack. +/// The pointed-to frame must remain alive until the matching pop. #[cfg(feature = "threading")] -pub fn push_thread_frame(frame: FrameRef) { - CURRENT_FRAME_SLOT.with(|slot| { +pub fn push_thread_frame(fp: FramePtr) { + CURRENT_THREAD_SLOT.with(|slot| { if let Some(s) = slot.borrow().as_ref() { - s.lock().push(frame); + s.frames.lock().push(fp); + } else { + debug_assert!( + false, + "push_thread_frame called without initialized thread slot" + ); } }); } @@ -90,9 +107,14 @@ pub fn push_thread_frame(frame: FrameRef) { /// Called when a frame is exited. #[cfg(feature = "threading")] pub fn pop_thread_frame() { - CURRENT_FRAME_SLOT.with(|slot| { + CURRENT_THREAD_SLOT.with(|slot| { if let Some(s) = slot.borrow().as_ref() { - s.lock().pop(); + s.frames.lock().pop(); + } else { + debug_assert!( + false, + "pop_thread_frame called without initialized thread slot" + ); } }); } @@ -109,25 +131,51 @@ pub fn get_current_frame() -> *const Frame { CURRENT_FRAME.with(|c| c.load(Ordering::Relaxed) as *const Frame) } -/// Cleanup frame tracking for the current thread. Called at thread exit. +/// Update the current thread's exception slot atomically (no locks). +/// Called from push_exception/pop_exception/set_exception. +#[cfg(feature = "threading")] +pub fn update_thread_exception(exc: Option) { + CURRENT_THREAD_SLOT.with(|slot| { + if let Some(s) = slot.borrow().as_ref() { + // SAFETY: Called only from the owning thread. The old ref is dropped + // here on the owning thread, which is safe. + let _old = unsafe { s.exception.swap(exc) }; + } + }); +} + +/// Collect all threads' current exceptions for sys._current_exceptions(). +/// Acquires the global registry lock briefly, then reads each slot's exception atomically. +#[cfg(feature = "threading")] +pub fn get_all_current_exceptions(vm: &VirtualMachine) -> Vec<(u64, Option)> { + let registry = vm.state.thread_frames.lock(); + registry + .iter() + .map(|(id, slot)| (*id, slot.exception.to_owned())) + .collect() +} + +/// Cleanup thread slot for the current thread. Called at thread exit. #[cfg(feature = "threading")] pub fn cleanup_current_thread_frames(vm: &VirtualMachine) { let thread_id = crate::stdlib::thread::get_ident(); vm.state.thread_frames.lock().remove(&thread_id); - CURRENT_FRAME_SLOT.with(|s| { + CURRENT_THREAD_SLOT.with(|s| { *s.borrow_mut() = None; }); } -/// Reinitialize frame slot after fork. Called in child process. +/// Reinitialize thread slot after fork. Called in child process. /// Creates a fresh slot and registers it for the current thread, /// preserving the current thread's frames from `vm.frames`. #[cfg(feature = "threading")] pub fn reinit_frame_slot_after_fork(vm: &VirtualMachine) { let current_ident = crate::stdlib::thread::get_ident(); - // Preserve the current thread's frames across fork - let current_frames: Vec = vm.frames.borrow().clone(); - let new_slot = Arc::new(parking_lot::Mutex::new(current_frames)); + let current_frames: Vec = vm.frames.borrow().clone(); + let new_slot = Arc::new(ThreadSlot { + frames: parking_lot::Mutex::new(current_frames), + exception: crate::PyAtomicRef::from(vm.topmost_exception()), + }); // After fork, only the current thread exists. If the lock was held by // another thread during fork, force unlock it. @@ -144,8 +192,7 @@ pub fn reinit_frame_slot_after_fork(vm: &VirtualMachine) { registry.insert(current_ident, new_slot.clone()); drop(registry); - // Update thread-local to point to the new slot - CURRENT_FRAME_SLOT.with(|s| { + CURRENT_THREAD_SLOT.with(|s| { *s.borrow_mut() = Some(new_slot); }); } diff --git a/crates/vm/src/vm/vm_new.rs b/crates/vm/src/vm/vm_new.rs index 7c6035b62d1..a67c0636614 100644 --- a/crates/vm/src/vm/vm_new.rs +++ b/crates/vm/src/vm/vm_new.rs @@ -503,11 +503,52 @@ impl VirtualMachine { } let mut narrow_caret = false; match error { + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedString, + ) + | ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedString, + ), + ), + .. + }) => { + msg = "unterminated f-string literal".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedTripleQuotedString, + ) + | ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedTripleQuotedString, + ), + ), + .. + }) => { + msg = "unterminated triple-quoted f-string literal".to_owned(); + } #[cfg(feature = "parser")] crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { error: ruff_python_parser::ParseErrorType::FStringError(_) - | ruff_python_parser::ParseErrorType::UnexpectedExpressionToken, + | ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::FStringError(_), + ), + .. + }) => { + // Replace backticks with single quotes to match CPython's error messages + msg = msg.replace('`', "'"); + msg.insert_str(0, "invalid syntax: "); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::UnexpectedExpressionToken, .. }) => msg.insert_str(0, "invalid syntax: "), #[cfg(feature = "parser")] @@ -532,6 +573,47 @@ impl VirtualMachine { msg = "invalid syntax".to_owned(); narrow_caret = true; } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::InvalidDeleteTarget, + .. + }) => { + msg = "invalid syntax".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::LineContinuationError, + ), + .. + }) => { + msg = "unexpected character after line continuation".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::UnclosedStringError, + ), + .. + }) => { + msg = "unterminated string".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::OtherError(s), + .. + }) if s.eq_ignore_ascii_case("bytes literal cannot be mixed with non-bytes literals") => { + msg = "cannot mix bytes and nonbytes literals".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::OtherError(s), + .. + }) if s.starts_with("Expected an identifier, but found a keyword") => { + msg = "invalid syntax".to_owned(); + } _ => {} } if syntax_error_type.is(self.ctx.exceptions.tab_error) { diff --git a/crates/vm/src/warn.rs b/crates/vm/src/warn.rs index b4406ff5246..684630e6af0 100644 --- a/crates/vm/src/warn.rs +++ b/crates/vm/src/warn.rs @@ -551,7 +551,7 @@ fn setup_context( skip_file_prefixes: Option<&PyTupleRef>, vm: &VirtualMachine, ) -> PyResult<(PyStrRef, usize, Option, PyObjectRef)> { - let mut f = vm.current_frame().as_deref().cloned(); + let mut f = vm.current_frame(); // Stack level comparisons to Python code is off by one as there is no // warnings-related stack level to avoid. diff --git a/extra_tests/snippets/syntax_short_circuit_bool.py b/extra_tests/snippets/syntax_short_circuit_bool.py index 76d89352cbb..6cbae190cae 100644 --- a/extra_tests/snippets/syntax_short_circuit_bool.py +++ b/extra_tests/snippets/syntax_short_circuit_bool.py @@ -31,3 +31,6 @@ def __bool__(self): # if ExplodingBool(False) and False and True and False: # pass + +# Issue #3567: nested BoolOps should not call __bool__ redundantly +assert (ExplodingBool(False) and False or False) == False