先要明白Linux下delete file只是对上层目录做改变:
目录文件的data block中存储了该目录下的所有目录项:主要2个域:inode number & filename, 由于有
inode number, 所以可以指向其它inode,也就能实现搜索了,比如 /usr/local/src/linux-2.6.33/
inode中包含file的多数属性,却把filename放在了目录文件中。因此如果普通用户居然能删root的文件,
那就看文件所在的目录是否对other开放了w权限,因为delete file是改变上层目录文件的内容,而不是file本身。
$ ll /usr/local|grep src
drwxr-xr-x. 6 root root 4096 2010-03-31 20:59 src
$ ll /usr/local/src/
-rw-r--r--. 1 cngrid cngrid 13864479 2009-12-01 20:15 all-20071007.tar.bz2
dr--r--r--. 23 cngrid cngrid 4096 2010-03-30 13:58 linux-2.6.33
-rw-r--r--. 1 cngrid cngrid 1294784 2010-03-31 18:52 linux-fetion-1.3-1.fc12.i686.rpm
由于src没有对普通用户开放w权限,所以即使cngrid是src/内文件的owner,也无法删除src内的文件。
/*
* Structure of an inode on the disk
*/
struct ext2_inode {
__le16 i_mode; /* File mode */
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
__le32 i_ctime; /* Creation time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
__le16 i_links_count; /* Links count */
__le32 i_blocks; /* Blocks count */
__le32 i_flags; /* File flags */
union {
struct {
__le32 l_i_reserved1;
} linux1;
struct {
__le32 h_i_translator;
} hurd1;
struct {
__le32 m_i_reserved1;
} masix1;
} osd1; /* OS dependent 1 */
__le32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
__le32 i_generation; /* File version (for NFS) */
__le32 i_file_acl; /* File ACL */
__le32 i_dir_acl; /* Directory ACL */
__le32 i_faddr; /* Fragment address */
union {
struct {
__u8 l_i_frag; /* Fragment number */
__u8 l_i_fsize; /* Fragment size */
__u16 i_pad1;
__le16 l_i_uid_high; /* these 2 fields */
__le16 l_i_gid_high; /* were reserved2[0] */
__u32 l_i_reserved2;
} linux2;
struct {
__u8 h_i_frag; /* Fragment number */
__u8 h_i_fsize; /* Fragment size */
__le16 h_i_mode_high;
__le16 h_i_uid_high;
__le16 h_i_gid_high;
__le32 h_i_author;
} hurd2;
struct {
__u8 m_i_frag; /* Fragment number */
__u8 m_i_fsize; /* Fragment size */
__u16 m_pad1;
__u32 m_i_reserved2[2];
} masix2;
} osd2; /* OS dependent 2 */
};
i_block[] 就是多级寻址数组,容量为15。
前12个指针直接指向block,后3个指针分别对应1级,2级,3级寻址:
#define EXT2_NDIR_BLOCKS 12
#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS
#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1)
#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1)
#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
/*
* Structure of a directory entry
*/
#define EXT2_NAME_LEN 255
struct ext2_dir_entry {
__le32 inode; /* Inode number */
__le16 rec_len; /* Directory entry length */
__le16 name_len; /* Name length */
char name[EXT2_NAME_LEN]; /* File name */
};
inode中包含file的多数属性,却把filename放在了目录文件中。因此如果普通用户居然能删root的文件,
那就看文件所在的目录是否对other开放了w权限,因为delete file是改变上层目录文件的内容,而不是
file本身。
发现一些现象:
openoffice打开一个.doc,这边看着,桌面那边可以直接删除,等这边关闭后,没有save提示,直接退出,
.doc丢失。
smplayer看1.4G的movie,正看着,在xterm里可以用rm -rf 删除,ls看不见了,这边丝毫没影响,照样看,
只要别想着关了重来就一直能看到完。但du却发现硬盘空闲空间并未增加,所以不是真删除。直到关了
smplayer用du才发现空间释放了。
有时mldonkey下一个非常想看的电影,马上就要下完了所以就等着,见下完了马上浏览,发现是打着兰兰的
旗号的欧美烂片,马上删除。但是刚删后用du发现硬盘空余空间没增加,这说明mldonkey中有thread正在对
刚下完的文件执行一些很慢的操作,还没等执行完呢,我就把文件删了,但只要那个thread不退出,rm -rf
操作就不会反映到硬盘上。虽然ls看不到movie了,但没真正删除,直到过一段时间之后用du才发现硬盘空闲
空间变大了。
下了个ape,用xmms先听听,好就收藏,如果开头就好,那就直接mv到ape/中,这边继续听完,没影响。不
必等到听完后在mv,也不用先退出xmms在mv。
正用ftp下一个专辑,发现中途硬盘空间不够了,那只需直接mv到大的分区中就不用管了,不必中断ftp。
有人用apache server,不注意把log rm或者mv了,由于server没退出,继续对write log,所以硬盘空闲空
间在不断变小却还不能马上知道是什么进程在操作,直到 /proc//fd/ 中ll找到原因。
可见Linux这种删除文件的方式有好处也有坏处。VFS中的dcache部分有这个处理:
首先,为了避免多个进程对同一个dentry删除多次,每个进程lookup操作时都要先dentry->d_count++。
这种方法很常见,kernel里到处都是这种方法。
其次,delete一个dentry,dentry->d_count > 1时,说明除了本进程之外还有其它进程在使用,不能删除
硬盘上的文件,所以只是把该dentry从hash queue中删除,这样其它进程VFS lookup就找不到它了。这样
还不够,还要防止其它进程此后从硬盘上读取数据重新组装dentry,否则会导致该dentry永远无法删除。
dentry->d_count == 1时,本进程本来可以删除该dentry对应的文件,但为了保证开头说的那些功能,这里
也不能删硬盘上的文件,kernel 为此给dentry引入了一个negative状态: 只删除dentry->d_inode,然后
dentry->d_inode = NULL; 这样虽然硬盘上文件还在,为了加快其它进程的lookup,该dentry还允许留在
hash queue中,但不能允许其它进程访问硬盘上的文件,比如sys_open()要返回error,negative状态的引
入就使得其它进程无法通过d_inode访问硬盘上的文件,达到目的了。
dentry除了negative状态之外,还有两种状态:
in use: dentry->d_count > 0 && dentry->d_inode != NULL
unused: dentry->d_count == 0 && dentry->d_inode != NULL
negative和unused状态的dentry在kernel回收内存时可能被释放掉,而inuse状态的是不能被回收的。
看看VFS中这部分的实现,注释说的很清楚:
186 /*
187 * This is dput
188 *
189 * This is complicated by the fact that we do not want to put
190 * dentries that are no longer on any hash chain on the unused
191 * list: we'd much rather just get rid of them immediately.
192 *
193 * However, that implies that we have to traverse the dentry
194 * tree upwards to the parents which might _also_ now be
195 * scheduled for deletion (it may have been only waiting for
196 * its last child to go away).
197 *
198 * This tail recursion is done by hand as we don't want to depend
199 * on the compiler to always get this right (gcc generally doesn't).
200 * Real recursion would eat up our stack space.
201 */
202
203 /*
204 * dput - release a dentry
205 * @dentry: dentry to release
206 *
207 * Release a dentry. This will drop the usage count and if appropriate
208 * call the dentry unlink method as well as removing it from the queues and
209 * releasing its resources. If the parent dentries were scheduled for release
210 * they too may now get deleted.
211 *
212 * no dcache lock, please.
213 */
214
215 void dput(struct dentry *dentry)
216 {
217 if (!dentry)
218 return;
219
220 repeat:
221 if (atomic_read(&dentry->d_count) == 1)
222 might_sleep();
223 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
224 return;
225
226 spin_lock(&dentry->d_lock);
227 if (atomic_read(&dentry->d_count)) {
228 spin_unlock(&dentry->d_lock);
229 spin_unlock(&dcache_lock);
230 return;
231 }
232
233 /*
234 * AV: ->d_delete() is _NOT_ allowed to block now.
235 */
236 if (dentry->d_op && dentry->d_op->d_delete) {
237 if (dentry->d_op->d_delete(dentry))
238 goto unhash_it;
239 }
240 /* Unreachable? Get rid of it */
241 if (d_unhashed(dentry))
242 goto kill_it;
243 if (list_empty(&dentry->d_lru)) {
244 dentry->d_flags |= DCACHE_REFERENCED;
245 dentry_lru_add(dentry);
246 }
247 spin_unlock(&dentry->d_lock);
248 spin_unlock(&dcache_lock);
249 return;
250
251 unhash_it:
252 __d_drop(dentry);
253 kill_it:
254 /* if dentry was on the d_lru list delete it from there */
255 dentry_lru_del(dentry);
256 dentry = d_kill(dentry);
257 if (dentry)
258 goto repeat;
259 }
dput() -> d_delete()
1487 /*
1488 * When a file is deleted, we have two options:
1489 * - turn this dentry into a negative dentry
1490 * - unhash this dentry and free it.
1491 *
1492 * Usually, we want to just turn this into
1493 * a negative dentry, but if anybody else is
1494 * currently using the dentry or the inode
1495 * we can't do that and we fall back on removing
1496 * it from the hash queues and waiting for
1497 * it to be deleted later when it has no users
1498 */
1499
1500 /**
1501 * d_delete - delete a dentry
1502 * @dentry: The dentry to delete
1503 *
1504 * Turn the dentry into a negative dentry if possible, otherwise
1505 * remove it from the hash queues so it can be deleted later
1506 */
1507
1508 void d_delete(struct dentry * dentry)
1509 {
1510 int isdir = 0;
1511 /*
1512 * Are we the only user?
1513 */
1514 spin_lock(&dcache_lock);
1515 spin_lock(&dentry->d_lock);
1516 isdir = S_ISDIR(dentry->d_inode->i_mode);
1517 if (atomic_read(&dentry->d_count) == 1) { // into negative
1518 dentry_iput(dentry);
1519 fsnotify_nameremove(dentry, isdir);
1520 return;
1521 }
1522
1523 if (!d_unhashed(dentry))
1524 __d_drop(dentry); // unhash
1525
1526 spin_unlock(&dentry->d_lock);
1527 spin_unlock(&dcache_lock);
1528
1529 fsnotify_nameremove(dentry, isdir);
1530 }
dput() -> d_delete() -> dentry_iput()
/*
* Release the dentry's inode, using the filesystem
* d_iput() operation if defined.
*/
static void dentry_iput(struct dentry * dentry)
__releases(dentry->d_lock)
__releases(dcache_lock)
{
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
if (!inode->i_nlink)
fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
dentry->d_op->d_iput(dentry, inode);
else
iput(inode);
} else {
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
}
}
dput() -> d_delete() -> __d_drop()
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
*
* d_drop() unhashes the entry from the parent dentry hashes, so that it won't
* be found through a VFS lookup any more. Note that this is different from
* deleting the dentry - d_delete will try to mark the dentry negative if
* possible, giving a successful _negative_ lookup, while d_drop will
* just make the cache lookup fail.
*
* d_drop() is used mainly for stuff that wants to invalidate a dentry for some
* reason (NFS timeouts or autofs deletes).
*
* __d_drop requires dentry->d_lock.
*/
static inline void __d_drop(struct dentry *dentry)
{
if (!(dentry->d_flags & DCACHE_UNHASHED)) {
dentry->d_flags |= DCACHE_UNHASHED;
hlist_del_rcu(&dentry->d_hash);
}
}