Skip to content

Lua字符串操作

概述

字符串是Lua中最重要的数据类型之一。Lua提供了丰富的字符串操作函数,支持字符串创建、修改、查找、替换等各种操作。

1. 字符串创建

1.1 基本字符串创建

lua
-- 使用双引号
local str1 = "Hello, World!"

-- 使用单引号
local str2 = 'Hello, Lua!'

-- 长字符串(多行字符串)
local str3 = [[
这是一个多行字符串
可以包含换行符
和其他特殊字符
]]

-- 带等号的长字符串(避免冲突)
local str4 = [==[
这个字符串可以包含 ]] 而不会结束
因为使用了 [==[ 和 ]==] 作为分隔符
]==]

print(str1)  -- Hello, World!
print(str2)  -- Hello, Lua!
print(str3)  -- 多行输出
print(str4)  -- 包含]]的字符串

1.2 转义字符

lua
-- 常用转义字符
local escaped = "Hello\nWorld\t!"
print(escaped)
-- 输出:
-- Hello
-- World	!

-- 转义字符列表
local examples = {
    "换行符: \\n",
    "制表符: \\t",
    "回车符: \\r",
    "反斜杠: \\\\",
    "双引号: \\\"",
    "单引号: \\'",
    "响铃: \\a",
    "退格: \\b",
    "垂直制表符: \\v",
    "换页符: \\f"
}

for _, example in ipairs(examples) do
    print(example)
end

-- 八进制和十六进制转义
local octal = "\065"    -- 八进制,等于字符'A'
local hex = "\x41"      -- 十六进制,等于字符'A'
print(octal, hex)       -- A A

2. 字符串基本操作

2.1 字符串长度

lua
local str = "Hello, Lua!"

-- 使用#操作符
print(#str)              -- 12

-- 使用string.len函数
print(string.len(str))   -- 12

-- UTF-8字符串长度(Lua 5.3+)
local utf8_str = "你好,世界!"
print(#utf8_str)         -- 字节长度
print(utf8.len(utf8_str)) -- 字符长度(如果支持utf8库)

2.2 字符串连接

lua
-- 使用..操作符
local first = "Hello"
local second = "World"
local result = first .. ", " .. second .. "!"
print(result)  -- Hello, World!

-- 数字会自动转换为字符串
local num = 42
local str = "The answer is " .. num
print(str)  -- The answer is 42

-- 大量字符串连接时使用table.concat(更高效)
local parts = {}
for i = 1, 1000 do
    parts[i] = "string" .. i
end
local big_string = table.concat(parts, ", ")
print(#big_string)

3. 字符串查找和匹配

3.1 string.find - 查找子字符串

lua
local text = "Hello, Lua programming!"

-- 基本查找
local start_pos, end_pos = string.find(text, "Lua")
print(start_pos, end_pos)  -- 8 10

-- 从指定位置开始查找
local pos = string.find(text, "o", 5)  -- 从第5个字符开始查找
print(pos)  -- 15

-- 查找不存在的子字符串
local not_found = string.find(text, "Python")
print(not_found)  -- nil

-- 使用模式匹配
local digit_pos = string.find("abc123def", "%d")  -- 查找第一个数字
print(digit_pos)  -- 4

3.2 string.match - 模式匹配

lua
local text = "The price is $25.99"

-- 提取数字
local price = string.match(text, "%d+%.%d+")
print(price)  -- 25.99

-- 提取单词
local word = string.match(text, "%w+")
print(word)  -- The

-- 提取多个匹配项
local email = "Contact us at info@example.com or support@test.org"
local domain = string.match(email, "@([%w%.]+)")
print(domain)  -- example.com

3.3 string.gmatch - 全局匹配迭代器

lua
local text = "apple banana cherry date"

-- 遍历所有单词
for word in string.gmatch(text, "%w+") do
    print(word)
end
-- 输出: apple, banana, cherry, date

-- 提取所有数字
local numbers_text = "There are 5 apples, 10 bananas, and 3 oranges"
for number in string.gmatch(numbers_text, "%d+") do
    print("Number:", number)
end
-- 输出: 5, 10, 3

-- 解析键值对
local config = "name=John age=30 city=Beijing"
local settings = {}
for key, value in string.gmatch(config, "(%w+)=(%w+)") do
    settings[key] = value
end

for k, v in pairs(settings) do
    print(k, v)
end

4. 字符串替换

4.1 string.gsub - 全局替换

lua
local text = "Hello World, Hello Lua!"

-- 简单替换
local result1 = string.gsub(text, "Hello", "Hi")
print(result1)  -- Hi World, Hi Lua!

-- 限制替换次数
local result2, count = string.gsub(text, "Hello", "Hi", 1)
print(result2)  -- Hi World, Hello Lua!
print(count)    -- 1

-- 使用模式替换
local phone = "123-456-7890"
local formatted = string.gsub(phone, "(%d%d%d)-(%d%d%d)-(%d%d%d%d)", "(%1) %2-%3")
print(formatted)  -- (123) 456-7890

-- 使用函数进行替换
local text_with_numbers = "I have 5 apples and 10 oranges"
local doubled = string.gsub(text_with_numbers, "%d+", function(num)
    return tostring(tonumber(num) * 2)
end)
print(doubled)  -- I have 10 apples and 20 oranges

4.2 使用表进行替换

lua
local template = "Hello {name}, you are {age} years old"
local replacements = {
    name = "Alice",
    age = "30"
}

local result = string.gsub(template, "{(%w+)}", replacements)
print(result)  -- Hello Alice, you are 30 years old

-- 更复杂的模板替换
local function template_replace(template, values)
    return string.gsub(template, "{([^}]+)}", function(key)
        return values[key] or "{" .. key .. "}"
    end)
end

local html = "<h1>{title}</h1><p>{content}</p><span>{unknown}</span>"
local data = {
    title = "Welcome",
    content = "This is a test page"
}

local result = template_replace(html, data)
print(result)  -- <h1>Welcome</h1><p>This is a test page</p><span>{unknown}</span>

5. 字符串截取和分割

5.1 string.sub - 字符串截取

lua
local str = "Hello, Lua programming!"

-- 基本截取
print(string.sub(str, 1, 5))    -- Hello
print(string.sub(str, 8, 10))   -- Lua
print(string.sub(str, 8))       -- Lua programming!

-- 负数索引(从末尾开始)
print(string.sub(str, -12))     -- programming!
print(string.sub(str, -12, -2)) -- programming

-- 超出范围的索引会被自动调整
print(string.sub(str, -100, 5)) -- Hello
print(string.sub(str, 10, 100)) -- a programming!

5.2 字符串分割

lua
-- 简单分割函数
function split(str, delimiter)
    local result = {}
    local pattern = "([^" .. delimiter .. "]+)"
    
    for match in string.gmatch(str, pattern) do
        table.insert(result, match)
    end
    
    return result
end

local csv = "apple,banana,cherry,date"
local fruits = split(csv, ",")
for i, fruit in ipairs(fruits) do
    print(i, fruit)
end

-- 更强大的分割函数
function advanced_split(str, delimiter, max_splits)
    local result = {}
    local start = 1
    local splits = 0
    
    while true do
        local pos = string.find(str, delimiter, start, true)  -- 纯文本查找
        
        if not pos or (max_splits and splits >= max_splits) then
            table.insert(result, string.sub(str, start))
            break
        end
        
        table.insert(result, string.sub(str, start, pos - 1))
        start = pos + #delimiter
        splits = splits + 1
    end
    
    return result
end

local text = "one::two::three::four"
local parts = advanced_split(text, "::", 2)
for i, part in ipairs(parts) do
    print(i, part)
end
-- 输出: 1 one, 2 two, 3 three::four

6. 字符串格式化

6.1 string.format - 格式化字符串

lua
-- 基本格式化
local name = "Alice"
local age = 30
local formatted = string.format("Name: %s, Age: %d", name, age)
print(formatted)  -- Name: Alice, Age: 30

-- 数字格式化
local pi = math.pi
print(string.format("Pi: %.2f", pi))        -- Pi: 3.14
print(string.format("Pi: %.6f", pi))        -- Pi: 3.141593
print(string.format("Pi: %e", pi))          -- Pi: 3.141593e+00

-- 整数格式化
local num = 42
print(string.format("Decimal: %d", num))    -- Decimal: 42
print(string.format("Hex: %x", num))        -- Hex: 2a
print(string.format("Hex: %X", num))        -- Hex: 2A
print(string.format("Octal: %o", num))      -- Octal: 52

-- 字符串格式化
local str = "Hello"
print(string.format("String: '%s'", str))           -- String: 'Hello'
print(string.format("String: '%-10s'", str))        -- String: 'Hello     '
print(string.format("String: '%10s'", str))         -- String: '     Hello'

-- 字符格式化
print(string.format("Character: %c", 65))   -- Character: A

6.2 自定义格式化函数

lua
-- 货币格式化
function format_currency(amount, currency)
    currency = currency or "$"
    return string.format("%s%.2f", currency, amount)
end

print(format_currency(1234.56))      -- $1234.56
print(format_currency(1234.56, "¥")) -- ¥1234.56

-- 时间格式化
function format_time(hours, minutes, seconds)
    return string.format("%02d:%02d:%02d", hours, minutes, seconds)
end

print(format_time(9, 5, 3))   -- 09:05:03
print(format_time(14, 30, 45)) -- 14:30:45

-- 文件大小格式化
function format_file_size(bytes)
    local units = {"B", "KB", "MB", "GB", "TB"}
    local size = bytes
    local unit_index = 1
    
    while size >= 1024 and unit_index < #units do
        size = size / 1024
        unit_index = unit_index + 1
    end
    
    if unit_index == 1 then
        return string.format("%d %s", size, units[unit_index])
    else
        return string.format("%.2f %s", size, units[unit_index])
    end
end

print(format_file_size(1024))      -- 1.00 KB
print(format_file_size(1536))      -- 1.50 KB
print(format_file_size(1048576))   -- 1.00 MB

7. 字符串转换

7.1 大小写转换

lua
local str = "Hello, Lua Programming!"

-- 转换为大写
print(string.upper(str))  -- HELLO, LUA PROGRAMMING!

-- 转换为小写
print(string.lower(str))  -- hello, lua programming!

-- 首字母大写
function capitalize(str)
    return string.upper(string.sub(str, 1, 1)) .. string.lower(string.sub(str, 2))
end

print(capitalize("hello world"))  -- Hello world

-- 标题格式(每个单词首字母大写)
function title_case(str)
    return string.gsub(str, "(%w)([%w]*)", function(first, rest)
        return string.upper(first) .. string.lower(rest)
    end)
end

print(title_case("hello lua programming"))  -- Hello Lua Programming

7.2 字符串反转

lua
local str = "Hello, World!"

-- 使用string.reverse
print(string.reverse(str))  -- !dlroW ,olleH

-- 自定义反转函数
function reverse_string(str)
    local result = ""
    for i = #str, 1, -1 do
        result = result .. string.sub(str, i, i)
    end
    return result
end

print(reverse_string(str))  -- !dlroW ,olleH

7.3 字符串编码转换

lua
-- 字符和ASCII码转换
print(string.char(65, 66, 67))    -- ABC
print(string.byte("ABC", 1, 3))   -- 65 66 67

-- 单个字符转换
local char = "A"
local ascii = string.byte(char)
print(ascii)                      -- 65
print(string.char(ascii))         -- A

-- 字符串转ASCII码数组
function string_to_ascii(str)
    local result = {}
    for i = 1, #str do
        result[i] = string.byte(str, i)
    end
    return result
end

local ascii_array = string_to_ascii("Hello")
for i, code in ipairs(ascii_array) do
    print(i, code, string.char(code))
end

8. 模式匹配详解

8.1 基本模式字符

lua
local text = "Hello123World456"

-- 字符类
print(string.match(text, "%a+"))    -- Hello (字母)
print(string.match(text, "%d+"))    -- 123 (数字)
print(string.match(text, "%w+"))    -- Hello123World456 (字母数字)
print(string.match(text, "%s+"))    -- nil (空白字符)

-- 模式字符说明
local patterns = {
    ["%a"] = "字母",
    ["%d"] = "数字",
    ["%l"] = "小写字母",
    ["%u"] = "大写字母",
    ["%w"] = "字母数字",
    ["%s"] = "空白字符",
    ["%p"] = "标点符号",
    ["%c"] = "控制字符",
    ["%x"] = "十六进制数字"
}

for pattern, description in pairs(patterns) do
    print(pattern .. ": " .. description)
end

8.2 量词和修饰符

lua
local text = "aaabbbccc123"

-- 量词
print(string.match(text, "a+"))     -- aaa (一个或多个)
print(string.match(text, "a*"))     -- aaa (零个或多个)
print(string.match(text, "a?"))     -- a (零个或一个)
print(string.match(text, "a-"))     -- "" (零个或多个,非贪婪)

-- 字符集
print(string.match(text, "[abc]+")) -- aaabbbccc
print(string.match(text, "[^abc]+")) -- 123 (非abc的字符)
print(string.match(text, "[a-z]+")) -- aaabbbccc

8.3 捕获和分组

lua
local email = "user@example.com"

-- 简单捕获
local user, domain = string.match(email, "([^@]+)@([^@]+)")
print(user, domain)  -- user example.com

-- 多重捕获
local date = "2023-12-25"
local year, month, day = string.match(date, "(%d+)-(%d+)-(%d+)")
print(year, month, day)  -- 2023 12 25

-- 嵌套捕获
local url = "https://www.example.com:8080/path"
local protocol, host, port, path = string.match(url, "([^:]+)://([^:/]+):?(%d*)(/.*)")
print(protocol, host, port, path)  -- https www.example.com 8080 /path

9. 实际应用示例

9.1 日志解析

lua
-- 解析Apache访问日志
function parse_apache_log(log_line)
    local pattern = '([%d%.]+) %- %- %[([^%]]+)%] "([^"]+)" (%d+) (%d+)'
    local ip, timestamp, request, status, size = string.match(log_line, pattern)
    
    if ip then
        return {
            ip = ip,
            timestamp = timestamp,
            request = request,
            status = tonumber(status),
            size = tonumber(size)
        }
    end
    
    return nil
end

local log = '192.168.1.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234'
local parsed = parse_apache_log(log)

if parsed then
    for k, v in pairs(parsed) do
        print(k, v)
    end
end

9.2 配置文件解析

lua
-- 解析INI格式配置文件
function parse_ini_string(ini_content)
    local config = {}
    local current_section = nil
    
    for line in string.gmatch(ini_content, "[^\r\n]+") do
        -- 去除前后空白
        line = string.match(line, "^%s*(.-)%s*$")
        
        -- 跳过空行和注释
        if line ~= "" and not string.match(line, "^[#;]") then
            -- 检查是否是节标题
            local section = string.match(line, "^%[(.+)%]$")
            if section then
                current_section = section
                config[current_section] = {}
            else
                -- 解析键值对
                local key, value = string.match(line, "^([^=]+)=(.*)$")
                if key and value and current_section then
                    key = string.match(key, "^%s*(.-)%s*$")
                    value = string.match(value, "^%s*(.-)%s*$")
                    config[current_section][key] = value
                end
            end
        end
    end
    
    return config
end

local ini_content = [[
[database]
host = localhost
port = 3306
username = admin
password = secret

[server]
port = 8080
debug = true
]]

local config = parse_ini_string(ini_content)
print(config.database.host)     -- localhost
print(config.server.port)       -- 8080

9.3 URL解析

lua
-- URL解析函数
function parse_url(url)
    local pattern = "^([^:]+)://([^:/]+):?(%d*)(/[^?]*)?%??([^#]*)#?(.*)$"
    local protocol, host, port, path, query, fragment = string.match(url, pattern)
    
    local result = {
        protocol = protocol,
        host = host,
        port = port ~= "" and tonumber(port) or nil,
        path = path or "/",
        query = {},
        fragment = fragment ~= "" and fragment or nil
    }
    
    -- 解析查询参数
    if query and query ~= "" then
        for key, value in string.gmatch(query, "([^&=]+)=([^&=]*)") do
            result.query[key] = value
        end
    end
    
    return result
end

local url = "https://example.com:8080/path/to/page?name=John&age=30#section1"
local parsed = parse_url(url)

print("Protocol:", parsed.protocol)
print("Host:", parsed.host)
print("Port:", parsed.port)
print("Path:", parsed.path)
print("Fragment:", parsed.fragment)

for key, value in pairs(parsed.query) do
    print("Query " .. key .. ":", value)
end

10. 性能优化技巧

10.1 字符串连接优化

lua
-- 低效的字符串连接
local function slow_concat(n)
    local result = ""
    for i = 1, n do
        result = result .. "string" .. i
    end
    return result
end

-- 高效的字符串连接
local function fast_concat(n)
    local parts = {}
    for i = 1, n do
        parts[i] = "string" .. i
    end
    return table.concat(parts)
end

-- 性能测试
local function time_function(func, ...)
    local start = os.clock()
    local result = func(...)
    local finish = os.clock()
    return result, finish - start
end

local n = 10000
local _, slow_time = time_function(slow_concat, n)
local _, fast_time = time_function(fast_concat, n)

print(string.format("慢速连接: %.4f 秒", slow_time))
print(string.format("快速连接: %.4f 秒", fast_time))
print(string.format("性能提升: %.2f 倍", slow_time / fast_time))

10.2 模式匹配优化

lua
-- 缓存编译的模式
local email_pattern = "([%w%.%-_]+)@([%w%.%-]+%.%w+)"

local function validate_email_cached(email)
    return string.match(email, email_pattern) ~= nil
end

-- 避免重复的全局匹配
local function extract_numbers_optimized(text)
    local numbers = {}
    local start = 1
    
    while true do
        local num_start, num_end = string.find(text, "%d+", start)
        if not num_start then break end
        
        local number = string.sub(text, num_start, num_end)
        table.insert(numbers, tonumber(number))
        start = num_end + 1
    end
    
    return numbers
end

总结

Lua字符串操作功能强大且灵活:

  1. 多种创建方式 - 支持单引号、双引号和长字符串
  2. 丰富的操作函数 - 查找、替换、分割、格式化等
  3. 强大的模式匹配 - 支持正则表达式风格的模式
  4. 高效的处理方式 - 合理使用可以获得很好的性能
  5. 实用的应用场景 - 文本处理、数据解析、格式化等

掌握字符串操作是Lua编程的重要技能,在实际开发中应用广泛。

基于 MIT 许可发布