File Search
Search for files and content within the sandbox environment using advanced filtering, content matching, and metadata-based search capabilities.
File Search
Search for files and content within the sandbox environment with comprehensive filtering options, content matching, metadata-based search, and advanced query capabilities for efficient file discovery.
🔍 Advanced Search Capabilities
File search supports content-based searching, metadata filtering, regex patterns, and multi-criteria search to efficiently locate files and data within the sandbox environment.
Overview
The File Search tool provides powerful search capabilities within the sandbox environment, supporting filename patterns, content matching, metadata filtering, and advanced search criteria for comprehensive file discovery and content analysis.
Key Features
- Content Searching - Search within file contents using text and regex patterns
- Metadata Filtering - Filter by file size, date, type, and properties
- Pattern Matching - Advanced filename and path pattern matching
- Multi-Criteria Search - Combine multiple search criteria for precise results
- Performance Optimization - Indexed searching and result caching for large datasets
Methods
fileSearch
Search for files and content in the sandbox environment.
| Parameter | Type | Required | Description |
|---|---|---|---|
| searchPath | String | No | Base path to search in (default: '/sandbox') |
| query | String | No | Text to search for in file contents |
| filePattern | String | No | File name pattern or glob expression |
| contentPattern | String | No | Regex pattern for content matching |
| fileTypes | Array | No | Filter by file extensions (e.g., ['.txt', '.csv']) |
| sizeRange | Object | No | File size range filter |
| dateRange | Object | No | File date range filter |
| recursive | Boolean | No | Search subdirectories recursively (default: true) |
| maxResults | Number | No | Maximum number of results to return (default: 100) |
| includeContent | Boolean | No | Include matched content snippets (default: false) |
{
"searchPath": "/sandbox/data",
"query": "error analysis",
"filePattern": "*.log",
"sizeRange": {
"min": 1024,
"max": 10485760
},
"recursive": true,
"maxResults": 50,
"includeContent": true
}Output:
success(Boolean) - Search operation success statustotalResults(Number) - Total number of matching resultsreturnedResults(Number) - Number of results returned in responsesearchTime(Number) - Search operation duration in millisecondsresults(Array) - Array of matching files and contentfilePath(String) - Path to matching filefileName(String) - File namefileSize(Number) - File size in byteslastModified(String) - File modification timestampmatches(Array) - Content matches (if content search performed)lineNumber(Number) - Line number of matchcontent(String) - Matched content snippetcontext(String) - Surrounding context
searchCriteria(Object) - Applied search criteria summary
Content-Based Searching
Text Content Search
File-Based Searching
Filename and Path Search
Advanced Search Operations
Multi-Criteria Search
def comprehensive_file_search(search_config):
"""Perform comprehensive search with multiple criteria."""
search_results = {}
for search_name, criteria in search_config.items():
print(f"🔍 Executing search: {search_name}")
# Build search parameters
search_params = {
"searchPath": criteria.get('path', '/sandbox'),
"recursive": criteria.get('recursive', True),
"maxResults": criteria.get('max_results', 100)
}
# Add optional parameters
if 'query' in criteria:
search_params['query'] = criteria['query']
if 'file_pattern' in criteria:
search_params['filePattern'] = criteria['file_pattern']
if 'content_pattern' in criteria:
search_params['contentPattern'] = criteria['content_pattern']
if 'file_types' in criteria:
search_params['fileTypes'] = criteria['file_types']
if 'size_range' in criteria:
search_params['sizeRange'] = criteria['size_range']
if 'date_range' in criteria:
search_params['dateRange'] = criteria['date_range']
if 'include_content' in criteria:
search_params['includeContent'] = criteria['include_content']
# Execute search
result = fileSearch(search_params)
if result['success']:
search_results[search_name] = {
"success": True,
"matches": result['totalResults'],
"files": result['results'],
"search_time": result['searchTime'],
"criteria": criteria
}
print(f" ✅ Found {result['totalResults']} matches in {result['searchTime']}ms")
else:
search_results[search_name] = {
"success": False,
"error": result.get('error'),
"criteria": criteria
}
print(f" ❌ Search failed: {result.get('error')}")
return search_results
def generate_search_report(search_results):
"""Generate comprehensive search report."""
report = {
"summary": {
"total_searches": len(search_results),
"successful_searches": len([r for r in search_results.values() if r['success']]),
"total_matches": sum(r.get('matches', 0) for r in search_results.values() if r['success']),
"total_search_time": sum(r.get('search_time', 0) for r in search_results.values() if r['success'])
},
"detailed_results": search_results
}
print("\n📊 Search Report Summary:")
print(f" Searches executed: {report['summary']['total_searches']}")
print(f" Successful: {report['summary']['successful_searches']}")
print(f" Total matches: {report['summary']['total_matches']}")
print(f" Total search time: {report['summary']['total_search_time']}ms")
# Top performing searches
successful_searches = [
(name, result) for name, result in search_results.items()
if result['success']
]
if successful_searches:
print("\n🏆 Top searches by matches:")
top_searches = sorted(successful_searches, key=lambda x: x[1]['matches'], reverse=True)[:5]
for i, (name, result) in enumerate(top_searches, 1):
print(f" {i}. {name}: {result['matches']} matches")
return report
# Usage with comprehensive search configuration
search_configuration = {
"error_analysis": {
"query": "error",
"file_types": [".log", ".txt"],
"path": "/sandbox/logs",
"include_content": True,
"max_results": 50
},
"large_data_files": {
"file_pattern": "*data*",
"file_types": [".csv", ".xlsx", ".json"],
"size_range": {"min": 5 * 1024 * 1024}, # > 5MB
"max_results": 30
},
"recent_python_files": {
"file_types": [".py", ".ipynb"],
"date_range": {
"start": (datetime.datetime.now() - datetime.timedelta(days=30)).isoformat()
},
"max_results": 100
},
"config_with_passwords": {
"content_pattern": r"password\s*[:=]\s*['\"][^'\"]+['\"]",
"file_types": [".json", ".yaml", ".ini", ".conf"],
"include_content": True,
"max_results": 20
},
"temporary_files": {
"file_pattern": "*.tmp",
"path": "/sandbox/temp",
"recursive": True,
"max_results": 200
}
}
comprehensive_results = comprehensive_file_search(search_configuration)
search_report = generate_search_report(comprehensive_results)Search Performance and Optimization
Search Result Caching
def cached_search_manager():
"""Manage cached search results for performance."""
search_cache = {}
def execute_search_with_cache(search_key, search_params, cache_duration_minutes=30):
"""Execute search with result caching."""
import time
import hashlib
# Generate cache key
cache_key = hashlib.md5(str(sorted(search_params.items())).encode()).hexdigest()
current_time = time.time()
# Check cache
if cache_key in search_cache:
cached_result = search_cache[cache_key]
if current_time - cached_result['timestamp'] < cache_duration_minutes * 60:
print(f"🚀 Using cached result for: {search_key}")
return cached_result['result']
# Execute search
print(f"🔍 Executing fresh search: {search_key}")
result = fileSearch(search_params)
# Cache result
if result['success']:
search_cache[cache_key] = {
'result': result,
'timestamp': current_time,
'search_key': search_key
}
return result
def clear_expired_cache():
"""Clear expired cache entries."""
current_time = time.time()
expired_keys = [
key for key, data in search_cache.items()
if current_time - data['timestamp'] > 1800 # 30 minutes
]
for key in expired_keys:
del search_cache[key]
print(f"🧹 Cleared {len(expired_keys)} expired cache entries")
def get_cache_stats():
"""Get cache statistics."""
return {
"total_entries": len(search_cache),
"entries": [
{
"search_key": data['search_key'],
"age_minutes": (time.time() - data['timestamp']) / 60,
"results": data['result']['totalResults']
}
for data in search_cache.values()
]
}
return execute_search_with_cache, clear_expired_cache, get_cache_stats
# Usage
cached_search, clear_cache, cache_stats = cached_search_manager()
# Perform searches with caching
result1 = cached_search("error_logs", {
"query": "ERROR",
"fileTypes": [".log"],
"recursive": True
})
# Same search will use cache
result2 = cached_search("error_logs", {
"query": "ERROR",
"fileTypes": [".log"],
"recursive": True
})
# Check cache statistics
stats = cache_stats()
print(f"Cache contains {stats['total_entries']} entries")Error Handling
Common Search Issues
| Error Type | Cause | Resolution |
|---|---|---|
| Path Not Found | Search path doesn't exist | Verify path and check permissions |
| Permission Denied | Insufficient read permissions | Check directory and file permissions |
| Pattern Too Broad | Search pattern matches too many files | Refine search criteria and add filters |
| Timeout Error | Search operation takes too long | Use smaller search scope or pagination |
| Memory Limit | Too many results in memory | Reduce maxResults or use streaming |
Robust Search Implementation
def robust_search_with_fallbacks(primary_search, fallback_searches=None):
"""Perform search with fallback strategies."""
# Try primary search first
try:
result = fileSearch(primary_search)
if result['success'] and result['totalResults'] > 0:
return {
"success": True,
"search_type": "primary",
"result": result
}
elif result['success'] and result['totalResults'] == 0:
print("⚠️ Primary search returned no results, trying fallbacks...")
else:
print(f"⚠️ Primary search failed: {result.get('error')}")
except Exception as e:
print(f"💥 Primary search exception: {str(e)}")
# Try fallback searches
if fallback_searches:
for i, fallback in enumerate(fallback_searches):
print(f"🔄 Trying fallback {i+1}: {fallback.get('description', 'Unknown')}")
try:
fallback_result = fileSearch(fallback['params'])
if fallback_result['success'] and fallback_result['totalResults'] > 0:
return {
"success": True,
"search_type": f"fallback_{i+1}",
"description": fallback.get('description'),
"result": fallback_result
}
except Exception as e:
print(f"💥 Fallback {i+1} exception: {str(e)}")
# All searches failed
return {
"success": False,
"error": "All search strategies failed",
"attempted": ["primary"] + [f"fallback_{i+1}" for i in range(len(fallback_searches or []))]
}
# Usage with fallback strategies
primary_search_params = {
"query": "specific error message",
"fileTypes": [".log"],
"contentPattern": r"ERROR.*database.*connection",
"recursive": True
}
fallback_strategies = [
{
"description": "Broader error search",
"params": {
"query": "error",
"fileTypes": [".log"],
"recursive": True
}
},
{
"description": "Any log files",
"params": {
"filePattern": "*.log",
"recursive": True
}
},
{
"description": "Any text files with error",
"params": {
"query": "error",
"fileTypes": [".txt", ".log", ".out"],
"recursive": True
}
}
]
robust_result = robust_search_with_fallbacks(primary_search_params, fallback_strategies)
if robust_result['success']:
print(f"✅ Search successful using {robust_result['search_type']}")
print(f" Found {robust_result['result']['totalResults']} results")
else:
print(f"❌ All search attempts failed")Related Tools
List Files
Browse directory contents to identify search targets
Read File
Read contents of files found through search operations
File Metadata
Extract detailed metadata from search results
Next Steps: Combine with Read File to analyze found content, or use File Metadata for detailed file analysis.